1use std::borrow::{Borrow, Cow};
2use std::cmp;
3use std::error::Error;
4use std::fmt;
5use std::io;
6use std::io::Write;
7use std::str;
8
9use crate::errors::GbParserError;
10use crate::reader::parse_location;
11use crate::dna::revcomp;
12
13#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
14#[derive(Debug, PartialEq, Clone)]
15pub struct Date {
17 year: i32,
18 month: u32,
19 day: u32,
20}
21
22#[derive(Debug, PartialEq, Clone)]
23pub struct DateError;
24
25impl Date {
26 pub fn from_ymd(year: i32, month: u32, day: u32) -> Result<Date, DateError> {
29 if (1..=12).contains(&month) && (1..=31).contains(&day) {
30 Ok(Date { year, month, day })
31 } else {
32 Err(DateError)
33 }
34 }
35 pub fn year(&self) -> i32 {
36 self.year
37 }
38 pub fn month(&self) -> u32 {
39 self.month
40 }
41 pub fn day(&self) -> u32 {
42 self.day
43 }
44}
45
46impl fmt::Display for Date {
47 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
48 let month = match self.month {
49 1 => "JAN",
50 2 => "FEB",
51 3 => "MAR",
52 4 => "APR",
53 5 => "MAY",
54 6 => "JUN",
55 7 => "JUL",
56 8 => "AUG",
57 9 => "SEP",
58 10 => "OCT",
59 11 => "NOV",
60 12 => "DEC",
61 _ => unreachable!(),
62 };
63 write!(f, "{:02}-{}-{:04}", self.day, month, self.year)
64 }
65}
66
67#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
68#[derive(Debug, PartialEq, Clone, Copy)]
69pub enum GapLength {
70 Known(i64),
72 Unknown,
74 Unk100,
76}
77
78#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
79#[derive(Debug, PartialEq, Clone, Copy)]
80pub struct Before(pub bool);
81#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
82#[derive(Debug, PartialEq, Clone, Copy)]
83pub struct After(pub bool);
84
85#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
99#[derive(Debug, PartialEq, Clone)]
100pub enum Location {
101 Range((i64, Before), (i64, After)),
110 Between(i64, i64),
114 Complement(Box<Location>),
118 Join(Vec<Location>),
121 Order(Vec<Location>),
125 Bond(Vec<Location>),
126 OneOf(Vec<Location>),
127 External(String, Option<Box<Location>>),
128 Gap(GapLength),
129}
130
131#[derive(Debug, Error)]
132#[error("Not configured to fetch external sequences")]
133pub struct NoFetcherError;
134
135#[derive(Debug, Error)]
136pub enum LocationError {
137 #[error("Can't determine location due to ambiguity: {0}")]
138 Ambiguous(Location),
139 #[error("Can't resolve external location `{0}`: {1}")]
140 External(Location, #[source] Box<dyn Error>),
141 #[error("Recursion limit reached while processing: {0}")]
142 Recursion(Location),
143 #[error("Empty location list encountered")]
145 Empty,
146 #[error("Location refers to a location outside of the sequence: {0}")]
147 OutOfBounds(Location),
148}
149
150impl Location {
151 pub fn simple_range(a: i64, b: i64) -> Location {
153 Location::Range((a, Before(false)), (b, After(false)))
154 }
155
156 pub fn single(a: i64) -> Location {
157 Location::simple_range(a, a + 1)
158 }
159
160 pub fn find_bounds(&self) -> Result<(i64, i64), LocationError> {
163 use Location::*;
164 match *self {
165 Range((a, _), (b, _)) => Ok((a, b)),
166 Complement(ref location) => location.find_bounds(),
167 Join(ref locations) | Order(ref locations) => {
168 let first = locations.first().ok_or(LocationError::Empty)?;
169 let last = locations.last().unwrap();
170 let (start, _) = first.find_bounds()?;
171 let (_, end) = last.find_bounds()?;
172 Ok((start, end))
173 }
174 Between(a, b) => Ok((a, b + 1)),
175 | Bond(ref locations)
176 | OneOf(ref locations) => {
177 let (left, right): (Vec<_>, Vec<_>) = locations
178 .iter()
179 .flat_map(Location::find_bounds) .unzip();
181 let min = left.into_iter().min().ok_or(LocationError::Empty)?;
182 let max = right.into_iter().max().unwrap();
183 Ok((min, max))
184 }
185 ref p => Err(LocationError::Ambiguous(p.clone())),
186 }
187 }
188
189 fn transform<L, V>(self, loc: &L, val: &V) -> Result<Location, LocationError>
191 where
192 L: Fn(Location) -> Result<Location, LocationError>,
193 V: Fn(i64) -> Result<i64, LocationError>,
194 {
195 loc(self)?.transform_impl(loc, val)
196 }
197
198 fn transform_impl<L, V>(self, loc: &L, val: &V) -> Result<Location, LocationError>
199 where
200 L: Fn(Location) -> Result<Location, LocationError>,
201 V: Fn(i64) -> Result<i64, LocationError>,
202 {
203 use Location::*;
204 let t_vec = |ps: Vec<Location>| {
205 ps.into_iter()
206 .map(|p| loc(p)?.transform_impl(loc, val))
207 .collect::<Result<Vec<_>, _>>()
208 };
209 let res = match self {
210 Complement(p) => Complement(Box::new(loc(*p)?.transform_impl(loc, val)?)),
212 Order(ps) => Order(t_vec(ps)?),
213 Bond(ps) => Bond(t_vec(ps)?),
214 OneOf(ps) => OneOf(t_vec(ps)?),
215 Join(ps) => Join(t_vec(ps)?),
216 Between(a, b) => Between(val(a)?, val(b)?),
218 Range((a, before), (b, after)) => Range((val(a)?, before), (val(b)?, after)),
219 External(..) => self,
221 Gap(..) => self,
222 };
223 Ok(res)
224 }
225
226 pub fn truncate(&self, start: i64, end: i64) -> Option<Location> {
230 use Location::*;
231 let filter = |ps: &[Location]| {
232 let res: Vec<_> = ps.iter().filter_map(|p| p.truncate(start, end)).collect();
233 if res.is_empty() {
234 None
235 } else {
236 Some(res)
237 }
238 };
239 match *self {
240 Between(a, b) => {
241 if (a >= start && a < end) || (b >= start && b < end) {
242 Some(self.clone())
243 } else {
244 None
245 }
246 }
247 Range((a, before), (b, after)) => {
248 match (a >= start && a < end, b > start && b <= end) {
249 (true, true) => Some(simplify_shallow(Range((a, before), (b, after))).unwrap()),
250 (true, false) => {
251 Some(simplify_shallow(Range((a, before), (end, After(false)))).unwrap())
252 } (false, true) => {
254 Some(simplify_shallow(Range((start, Before(false)), (b, after))).unwrap())
255 } (false, false) => {
257 if a <= start && b >= end {
259 Some(simplify_shallow(Location::simple_range(start, end)).unwrap()) } else {
261 None
262 }
263 }
264 }
265 }
266 Complement(ref a) => a.truncate(start, end).map(|a| Complement(Box::new(a))),
267 Join(ref ps) => filter(ps).map(|v| simplify_shallow(Join(v)).unwrap()),
268 OneOf(ref ps) => filter(ps).map(OneOf),
269 Bond(ref ps) => filter(ps).map(Bond),
270 Order(ref ps) => filter(ps).map(Order),
271 External(_, _) | Gap(_) => Some(self.clone()),
272 }
273 }
274
275 fn complement(self) -> Location {
276 match self {
277 Location::Complement(x) => *x,
278 x => Location::Complement(Box::new(x)),
279 }
280 }
281
282 pub fn to_gb_format(&self) -> String {
283 fn location_list(locations: &[Location]) -> String {
284 locations
285 .iter()
286 .map(Location::to_gb_format)
287 .collect::<Vec<_>>()
288 .join(",")
289 }
290 match *self {
291 Location::Between(a, b) => format!("{}^{}", a + 1, b + 1),
292 Location::Range((a, Before(before)), (b, After(after))) => {
293 if !before && !after && b == a + 1 {
294 format!("{}", a + 1)
295 } else {
296 let before = if before { "<" } else { "" };
297 let after = if after { ">" } else { "" };
298 format!("{}{}..{}{}", before, a + 1, after, b)
299 }
300 }
301 Location::Complement(ref p) => format!("complement({})", p.to_gb_format()),
302 Location::Join(ref locations) => format!("join({})", location_list(locations)),
303 Location::Order(ref locations) => format!("order({})", location_list(locations)),
304 Location::Bond(ref locations) => format!("bond({})", location_list(locations)),
305 Location::OneOf(ref locations) => format!("one-of({})", location_list(locations)),
306 Location::External(ref name, Some(ref location)) => {
307 format!("{}:{}", &name, location.to_gb_format())
308 }
309 Location::External(ref name, None) => format!("{}", name),
310 Location::Gap(GapLength::Known(length)) => format!("gap({})", length),
311 Location::Gap(GapLength::Unknown) => format!("gap()"),
312 Location::Gap(GapLength::Unk100) => format!("gap(unk100)"),
313 }
314 }
315
316 pub fn from_gb_format(s: &str) -> Result<Location, GbParserError> {
317 parse_location(s.as_bytes())
318 }
319}
320
321impl fmt::Display for Location {
322 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
323 write!(f, "{}", self.to_gb_format())
324 }
325}
326
327pub type Qualifier = (Cow<'static, str>, Option<String>);
329
330#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
334#[derive(Debug, PartialEq, Clone)]
335pub struct Feature {
336 pub kind: Cow<'static, str>,
337 pub location: Location,
338 pub qualifiers: Vec<Qualifier>,
339}
340
341impl Feature {
342 pub fn qualifier_values<'a>(&'a self, key: &'a str) -> impl Iterator<Item = &'a str> {
345 self.qualifiers
346 .iter()
347 .filter(move |&(k, _)| k == key)
348 .filter_map(|(_, v)| v.as_ref().map(String::as_str))
349 }
350}
351
352#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
353#[derive(Debug, PartialEq, Clone)]
354pub enum Topology {
355 Linear,
356 Circular,
357}
358
359impl fmt::Display for Topology {
360 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
361 let res = match *self {
362 Topology::Linear => "linear",
363 Topology::Circular => "circular",
364 };
365 write!(f, "{}", res)
366 }
367}
368
369#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
370#[derive(Debug, PartialEq, Clone)]
371pub struct Source {
372 pub source: String,
373 pub organism: Option<String>,
374}
375
376#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
377#[derive(Debug, PartialEq, Clone)]
378pub struct Reference {
379 pub description: String,
380 pub authors: Option<String>,
381 pub consortium: Option<String>,
382 pub title: String,
383 pub journal: Option<String>,
384 pub pubmed: Option<String>,
385 pub remark: Option<String>,
386}
387
388#[doc(hidden)]
391pub const REASONABLE_SEQ_LEN: usize = 500 * 1000 * 1000;
392
393#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
394#[derive(Debug, PartialEq, Clone)]
395pub struct Seq {
396 pub name: Option<String>,
398 pub topology: Topology,
400 pub date: Option<Date>,
402 pub len: Option<usize>,
407 pub molecule_type: Option<String>,
409 pub division: String,
410 pub definition: Option<String>,
411 pub accession: Option<String>,
412 pub version: Option<String>,
413 pub source: Option<Source>,
414 pub dblink: Option<String>,
415 pub keywords: Option<String>,
416 pub references: Vec<Reference>,
417 pub comments: Vec<String>,
418 #[cfg_attr(feature = "serde", serde(with = "serde_bytes"))]
419 pub seq: Vec<u8>,
420 pub contig: Option<Location>,
421 pub features: Vec<Feature>,
422}
423
424impl Seq {
425 pub fn empty() -> Seq {
427 Seq {
428 seq: vec![],
429 contig: None,
430 definition: None,
431 accession: None,
432 molecule_type: None,
433 division: String::from("UNK"),
434 dblink: None,
435 keywords: None,
436 source: None,
437 version: None,
438 comments: vec![],
439 references: vec![],
440 name: None,
441 topology: Topology::Linear,
442 date: None,
443 len: None,
444 features: vec![],
445 }
446 }
447
448 pub fn is_circular(&self) -> bool {
449 match self.topology {
450 Topology::Circular => true,
451 Topology::Linear => false,
452 }
453 }
454
455 pub fn len(&self) -> i64 {
461 if let Some(len) = self.len {
462 assert!(self.seq.is_empty() || len == self.seq.len());
463 len as i64
464 } else {
465 self.seq.len() as i64
466 }
467 }
468
469 pub fn unwrap_range(&self, start: i64, end: i64) -> (i64, i64) {
482 let len = self.len();
483 match self.topology {
484 Topology::Linear => {
485 assert!(start <= end);
486 assert!(start >= 0 && start < len && end >= 0 && end <= len);
487 (start, end)
488 }
489 Topology::Circular => {
490 let mut start = start;
491 let mut end = end;
492
493 if start >= end {
494 end += len;
495 }
496
497 while start >= len {
498 start -= len;
499 end -= len;
500 }
501
502 while start < 0 {
503 start += len;
504 end += len;
505 }
506
507 assert!(start >= 0 && start < len);
508 assert!(end > start);
509
510 (start, end)
511 }
512 }
513 }
514 pub fn range_to_location(&self, start: i64, end: i64) -> Location {
517 let res = match self.topology {
518 Topology::Linear => {
519 assert!(end > start);
520 assert!(start < self.len() && end <= self.len());
521 Location::simple_range(start, end)
522 }
523 Topology::Circular => {
524 let (start, end) = self.unwrap_range(start, end);
525 if end > self.len() {
526 assert!(end < self.len() * 2, "Range wraps around more than once!");
527 Location::Join(vec![
528 Location::simple_range(start, self.len()),
529 Location::simple_range(0, end - self.len()),
530 ])
531 } else {
532 Location::simple_range(start, end)
533 }
534 }
535 };
536 simplify(res).expect("invalid Location not possible here")
537 }
538
539 pub fn wrap_location(&self, p: Location) -> Result<Location, LocationError> {
542 use Location::*;
543 let res = p
544 .transform(
545 &|p| {
546 let res = match p {
547 Range((mut a, before), (mut b, after)) => {
548 while a >= self.len() {
549 a -= self.len();
550 b -= self.len();
551 }
552 if b <= self.len() {
553 Range((a, before), (b, after))
554 } else {
555 Join(vec![
556 Range((a, before), (self.len(), After(false))),
557 Range((0, Before(false)), (b - self.len(), after)),
558 ])
559 }
560 }
561 p => p,
562 };
563 Ok(res)
564 },
565 &Ok,
566 )
567 .unwrap();
568 simplify(res)
569 }
570
571 pub fn relocate_feature(&self, f: Feature, shift: i64) -> Result<Feature, LocationError> {
575 let res = Feature {
576 location: self.relocate_location(f.location, shift)?,
577 ..f
578 };
579 Ok(res)
580 }
581
582 pub fn relocate_location(
586 &self,
587 p: Location,
588 mut shift: i64,
589 ) -> Result<Location, LocationError> {
590 if self.is_circular() {
591 while shift < 0 {
592 shift += self.len();
593 }
594 while shift >= self.len() {
595 shift -= self.len();
596 }
597 let moved = p.transform(&Ok, &|v| Ok(v + shift)).unwrap(); self.wrap_location(moved)
599 } else {
600 Ok(p.transform(&Ok, &|v| Ok(v + shift)).unwrap())
601 }
602 }
603
604 fn revcomp_location(&self, p: Location) -> Result<Location, LocationError> {
606 let p = p
607 .transform(
608 &|mut p| {
609 match p {
610 Location::Join(ref mut locations)
611 | Location::Order(ref mut locations)
612 | Location::Bond(ref mut locations)
613 | Location::OneOf(ref mut locations) => {
614 locations.reverse();
615 }
616 _ => (),
617 };
618 let p = match p {
619 Location::Range((a, Before(before)), (b, After(after))) => {
620 Location::Range((b, Before(after)), (a, After(before)))
621 }
622 Location::Between(a, b) => Location::Between(b + 1, a + 1),
623 p => p,
624 };
625 Ok(p)
626 },
627 &|v| Ok(self.len() - v),
628 )
629 .unwrap(); simplify(p).map(Location::complement)
631 }
632
633 pub fn revcomp_feature(&self, f: Feature) -> Result<Feature, LocationError> {
636 Ok(Feature {
637 location: self.revcomp_location(f.location)?,
638 ..f
639 })
640 }
641
642 pub fn revcomp(&self) -> Seq {
645 let mut features = Vec::with_capacity(self.features.len());
646 for f in &self.features {
647 match self.revcomp_feature(f.clone()) {
648 Ok(f) => features.push(f),
649 Err(e) => warn!("Encountered invalid feature location: {}", e),
650 }
651 }
652 Seq {
653 features,
654 seq: revcomp(&self.seq),
655 ..self.clone()
656 }
657 }
658
659 pub fn extract_range_seq(&self, start: i64, end: i64) -> Cow<[u8]> {
664 let len = self.len() as usize;
667 assert!(len == self.seq.len());
668 let (start, end) = self.unwrap_range(start, end);
669 let mut start = start as usize;
670 let mut end = end as usize;
671 if end <= len {
672 Cow::from(&self.seq[start..end])
673 } else {
674 assert!(self.is_circular());
675 let mut res = Vec::with_capacity(end - start);
676 while end != 0 {
677 let slice_end = cmp::min(end, len);
678 end -= slice_end;
679 res.extend(&self.seq[start..slice_end]);
680 start = 0;
681 }
682 Cow::from(res)
683 }
684 }
685
686 pub fn extract_range(&self, start: i64, end: i64) -> Seq {
690 let (start, end) = self.unwrap_range(start, end);
691 let mut shift = -start;
692 if self.is_circular() {
693 while shift < 0 {
694 shift += self.len();
695 }
696 while shift > self.len() {
697 shift -= self.len();
698 }
699 }
700 let features = self
701 .features
702 .iter()
703 .flat_map(
704 |f| match self.relocate_location(f.location.clone(), shift) {
705 Ok(l) => l.truncate(0, end - start).map(|location| Feature {
707 location,
708 ..f.clone()
709 }),
710 Err(e) => {
711 warn!("Skipping feature, can't process invalid location: {}", e);
712 None
713 }
714 },
715 )
716 .collect();
717 Seq {
718 features,
719 seq: self.extract_range_seq(start, end).into(),
720 ..Seq::empty()
721 }
722 }
723
724 pub fn extract_location(&self, l: &Location) -> Result<Vec<u8>, LocationError> {
730 self.extract_location_with_fetcher(l, |_| Err::<Seq, _>(Box::new(NoFetcherError)))
731 }
732
733 pub fn extract_location_with_fetcher<'a, F, S>(
739 &self,
740 l: &Location,
741 mut ext_fetcher: F,
742 ) -> Result<Vec<u8>, LocationError>
743 where
744 F: (FnMut(&str) -> Result<S, Box<dyn Error>>) + 'a,
745 S: Borrow<Seq> + 'a,
746 {
747 self.extract_location_impl(l, &mut ext_fetcher)
748 }
749
750 fn extract_location_impl<'a, F, S>(
751 &self,
752 l: &Location,
753 ext_fetcher: &mut F,
754 ) -> Result<Vec<u8>, LocationError>
755 where
756 F: (FnMut(&str) -> Result<S, Box<dyn Error>>) + 'a,
757 S: Borrow<Seq> + 'a,
758 {
759 let get_range = |from: i64, to: i64| -> Result<&[u8], LocationError> {
760 let usize_or = |a: i64| -> Result<usize, LocationError> {
761 if a < 0 {
762 Err(LocationError::OutOfBounds(l.clone()))
763 } else {
764 Ok(a as usize)
765 }
766 };
767 let s = self
768 .seq
769 .get(usize_or(from)?..usize_or(to)?)
770 .ok_or_else(|| LocationError::OutOfBounds(l.clone()))?;
771 Ok(s)
772 };
773 use Location::*;
774 let res = match *l {
775 Range((a, _), (b, _)) => get_range(a, b)?.into(),
776 Join(ref ls) => {
777 let mut res = Vec::new();
778 for l in ls {
779 res.extend_from_slice(&self.extract_location_impl(l, ext_fetcher)?);
780 }
781 res
782 }
783 Complement(ref l) => revcomp(&self.extract_location_impl(l, ext_fetcher)?),
784 External(ref name, ref ext_l) => {
785 let ext_seq =
786 ext_fetcher(name).map_err(|e| LocationError::External(l.clone(), e))?;
787 if let Some(ext_l) = ext_l {
788 ext_seq.borrow().extract_location_impl(ext_l, ext_fetcher)?
789 } else {
790 ext_seq.borrow().seq.clone()
791 }
792 }
793 _ => return Err(LocationError::Ambiguous(l.clone())),
794 };
795 Ok(res)
796 }
797
798 pub fn set_origin(&self, origin: i64) -> Seq {
800 assert!(self.is_circular());
801 assert!(origin < self.len());
802 let rotated = self.extract_range_seq(origin, origin);
803 Seq {
804 seq: rotated.into(),
805 features: self
806 .features
807 .iter()
808 .cloned()
809 .flat_map(|f| self.relocate_feature(f, -origin))
810 .collect(),
811 ..self.clone()
812 }
813 }
814
815 pub fn write<T: Write>(&self, file: T) -> io::Result<()> {
816 crate::writer::write(file, self)
817 }
818}
819
820fn merge_adjacent(ps: Vec<Location>) -> Vec<Location> {
822 use Location::*;
823 let mut res: Vec<Location> = Vec::with_capacity(ps.len());
824 for p in ps {
825 if let Some(last) = res.last_mut() {
826 match (&last, p) {
827 (Range(a, (ref b, After(false))), Range((c, Before(false)), d)) => {
828 if *b == c {
829 *last = Range(*a, d);
830 } else {
831 res.push(Range((c, Before(false)), d));
832 }
833 }
834 (_, p) => res.push(p),
835 }
836 } else {
837 res.push(p);
838 }
839 }
840 res
841}
842
843fn flatten_join(v: Vec<Location>) -> Vec<Location> {
844 let mut res = Vec::with_capacity(v.len());
845 for p in v {
846 match p {
847 Location::Join(xs) => res.extend(flatten_join(xs)),
848 p => res.push(p),
849 }
850 }
851 res
852}
853
854fn simplify(p: Location) -> Result<Location, LocationError> {
857 p.transform(&simplify_shallow, &Ok)
858}
859
860fn simplify_shallow(p: Location) -> Result<Location, LocationError> {
861 use Location::*;
862 match p {
863 Join(xs) => {
864 if xs.is_empty() {
865 return Err(LocationError::Empty);
866 }
867 let xs = flatten_join(xs);
868 let mut xs = merge_adjacent(xs);
869 assert!(!xs.is_empty());
870 if xs.len() == 1 {
871 Ok(simplify_shallow(xs.pop().unwrap())?)
874 } else {
875 if xs
877 .iter()
878 .all(|x| matches!(x, Complement(_)))
879 {
880 xs = xs.into_iter().rev().map(Location::complement).collect();
881 Ok(Join(xs).complement())
882 } else {
883 Ok(Join(xs))
884 }
885 }
886 }
887 p => Ok(p),
888 }
889}
890
891#[cfg(test)]
892mod test {
893 use super::*;
894 use crate::tests::init;
895
896 #[test]
897 fn test_merge_adj() {
898 use Location::*;
899 assert_eq!(
900 merge_adjacent(vec![
901 Location::simple_range(0, 4),
902 Location::simple_range(5, 8),
903 ]),
904 vec![Location::simple_range(0, 4), Location::simple_range(5, 8)]
905 );
906 assert_eq!(
907 merge_adjacent(vec![
908 Location::simple_range(0, 5),
909 Location::simple_range(5, 8),
910 ]),
911 vec![Location::simple_range(0, 8)]
912 );
913 assert_eq!(
914 merge_adjacent(vec![Location::simple_range(0, 5), Location::single(5)]),
915 vec![Location::simple_range(0, 6)]
916 );
917 assert_eq!(
918 merge_adjacent(vec![Location::single(0), Location::simple_range(1, 5)]),
919 vec![Location::simple_range(0, 5)]
920 );
921 assert_eq!(
922 merge_adjacent(vec![Location::single(0), Location::single(1)]),
923 vec![Location::simple_range(0, 2)]
924 );
925 assert_eq!(
926 &Join(merge_adjacent(vec![
927 Location::Range((1, Before(true)), (3, After(false))),
928 Location::Range((3, Before(false)), (5, After(true)))
929 ]))
930 .to_gb_format(),
931 "join(<2..>5)"
932 );
933 }
934
935 #[test]
936 fn pos_relocate_circular() {
937 let s = Seq {
938 seq: b"0123456789".to_vec(),
939 topology: Topology::Circular,
940 ..Seq::empty()
941 };
942 assert_eq!(
943 s.relocate_location(Location::single(5), -9).unwrap(),
944 Location::single(6)
945 );
946 let span1 = Location::simple_range(7, 10);
947 let span2 = Location::simple_range(0, 4);
948 assert_eq!(span1.to_gb_format(), String::from("8..10"));
949 let join = Location::Join(vec![span1, span2]);
950 assert_eq!(
951 s.relocate_location(join, -5).unwrap().to_gb_format(),
952 String::from("3..9")
953 );
954 }
955
956 #[test]
957 fn relocate_location() {
958 let s = Seq {
959 seq: b"0123456789".to_vec(),
960 topology: Topology::Circular,
961 ..Seq::empty()
962 };
963 assert_eq!(
964 &s.relocate_location(Location::simple_range(0, 10), 5)
965 .unwrap()
966 .to_gb_format(),
967 "join(6..10,1..5)"
968 );
969 assert_eq!(
970 &s.relocate_location(Location::simple_range(5, 8), 5)
971 .unwrap()
972 .to_gb_format(),
973 "1..3"
974 );
975 assert_eq!(
976 &s.relocate_location(
977 Location::Join(vec![
978 Location::simple_range(7, 10),
979 Location::simple_range(0, 3)
980 ]),
981 2
982 )
983 .unwrap()
984 .to_gb_format(),
985 "join(10,1..5)"
986 );
987 assert_eq!(
988 &s.relocate_location(
989 Location::Join(vec![
990 Location::simple_range(8, 10),
991 Location::simple_range(0, 2)
992 ]),
993 2
994 )
995 .unwrap()
996 .to_gb_format(),
997 "1..4"
998 );
999 assert_eq!(
1000 &s.relocate_location(Location::simple_range(0, 3), 5)
1001 .unwrap()
1002 .to_gb_format(),
1003 "6..8"
1004 );
1005 assert_eq!(
1006 s.relocate_location(Location::single(5), -9).unwrap(),
1007 Location::single(6)
1008 );
1009 let span1 = Location::simple_range(7, 10);
1010 let span2 = Location::simple_range(0, 4);
1011 let join = Location::Join(vec![span1, span2]);
1012 assert_eq!(
1013 &s.relocate_location(join, -5).unwrap().to_gb_format(),
1014 "3..9"
1015 );
1016 }
1017
1018 #[test]
1019 fn extract_range_seq() {
1020 let s = Seq {
1021 seq: b"0123456789".to_vec(),
1022 topology: Topology::Circular,
1023 ..Seq::empty()
1024 };
1025 assert_eq!(
1026 s.extract_range_seq(0, 10).into_owned(),
1027 b"0123456789".to_vec()
1028 );
1029 assert_eq!(
1030 s.extract_range_seq(0, 11).into_owned(),
1031 b"01234567890".to_vec()
1032 );
1033 assert_eq!(
1034 s.extract_range_seq(-1, 10).into_owned(),
1035 b"90123456789".to_vec()
1036 );
1037 assert_eq!(
1038 s.extract_range_seq(-1, 11).into_owned(),
1039 b"901234567890".to_vec()
1040 );
1041 assert_eq!(
1042 s.extract_range_seq(-1, 21).into_owned(),
1043 b"9012345678901234567890".to_vec()
1044 );
1045 }
1046
1047 #[test]
1048 fn test_extract_linear() {
1049 let features = vec![Feature {
1050 location: Location::simple_range(0, 100),
1051 kind: "".into(),
1052 qualifiers: Vec::new(),
1053 }];
1054 let s = Seq {
1055 seq: ::std::iter::repeat(b'A').take(100).collect(),
1056 topology: Topology::Linear,
1057 features,
1058 ..Seq::empty()
1059 };
1060 for i in 0..91 {
1061 for j in 1..11 {
1062 let res = s.extract_range(i, i + j);
1063 assert_eq!(res.features.len(), 1);
1064 assert_eq!(
1065 res.features[0].location,
1066 simplify(Location::simple_range(0, j)).unwrap()
1067 );
1068 }
1069 }
1070 }
1071
1072 #[test]
1073 fn test_extract_circular() {
1074 let whole_seq = Feature {
1075 location: Location::simple_range(0, 10),
1076 kind: "".into(),
1077 qualifiers: Vec::new(),
1078 };
1079 let make_pos = |from: i64, to: i64| -> Location {
1080 if to > 10 {
1081 Location::Join(vec![
1082 simplify(Location::simple_range(from, 10)).unwrap(),
1083 simplify(Location::simple_range(0, to - 10)).unwrap(),
1084 ])
1085 } else {
1086 simplify(Location::simple_range(from, to)).unwrap()
1087 }
1088 };
1089
1090 for i in 0..10 {
1091 for j in 1..3 {
1092 let s = Seq {
1093 seq: ::std::iter::repeat(b'A').take(10).collect(),
1094 topology: Topology::Circular,
1095 features: vec![
1096 whole_seq.clone(),
1097 Feature {
1098 location: make_pos(i, i + j),
1099 kind: "".into(),
1100 qualifiers: Vec::new(),
1101 },
1102 ],
1103 ..Seq::empty()
1104 };
1105 let res = s.extract_range(i, i + j);
1106 assert_eq!(res.features.len(), 2);
1107 if i < 8 {
1108 assert_eq!(
1109 res.features[0].location,
1110 simplify(Location::simple_range(0, j)).unwrap()
1111 );
1112 }
1113 println!("1 {:?}", res.features[0]);
1114 println!("{} {}", i, j);
1115 if i < 8 {
1116 assert_eq!(
1117 res.features[1].location,
1118 simplify(Location::simple_range(0, j)).unwrap()
1119 );
1120 }
1121 println!("2 {:?}", res.features[1]);
1122 }
1123 }
1124 }
1125
1126 #[test]
1127 fn extract_exclude_features() {
1128 let s = Seq {
1129 topology: Topology::Circular,
1130 seq: (0..10).collect(),
1131 features: vec![Feature {
1132 location: Location::simple_range(0, 4),
1133 kind: "".into(),
1134 qualifiers: vec![],
1135 }],
1136 ..Seq::empty()
1137 };
1138 let res = s.extract_range(4, 10);
1139 assert_eq!(res.features, vec![]);
1140 let res = s.extract_range(0, 1);
1141 assert_eq!(res.features.len(), 1);
1142 assert_eq!(&res.features[0].location, &Location::single(0));
1143 let res = s.extract_range(3, 4);
1144 assert_eq!(res.features.len(), 1);
1145 assert_eq!(&res.features[0].location, &Location::single(0));
1146 let res = s.extract_range(0, 10);
1147 assert_eq!(&res.features[0].location, &Location::simple_range(0, 4));
1148 }
1149
1150 #[test]
1151 fn truncate() {
1152 assert_eq!(
1153 Location::single(0).truncate(0, 1),
1154 Some(Location::single(0))
1155 );
1156 assert_eq!(Location::single(0).truncate(1, 2), None);
1157 assert_eq!(
1158 Location::simple_range(0, 3).truncate(1, 2),
1159 Some(Location::single(1))
1160 );
1161 assert_eq!(
1162 Location::simple_range(0, 2).truncate(0, 2),
1163 Some(Location::simple_range(0, 2))
1164 );
1165 assert_eq!(
1166 Location::Complement(Box::new(Location::simple_range(0, 2))).truncate(0, 2),
1167 Some(Location::Complement(Box::new(Location::simple_range(0, 2))))
1168 );
1169 assert_eq!(
1170 Location::Complement(Box::new(Location::simple_range(0, 2))).truncate(10, 20),
1171 None
1172 );
1173 assert_eq!(Location::simple_range(0, 2).truncate(3, 4), None);
1174 let p = Location::Join(vec![
1175 Location::simple_range(0, 3),
1176 Location::simple_range(4, 7),
1177 ]);
1178 assert_eq!(p.truncate(0, 3), Some(Location::simple_range(0, 3)));
1179 assert_eq!(p.truncate(10, 30), None);
1180 }
1181
1182 #[test]
1183 fn test_extract_circular_split() {
1184 let features = vec![Feature {
1185 location: Location::Join(vec![
1186 Location::simple_range(0, 2),
1187 Location::simple_range(4, 9),
1188 ]),
1189 kind: "".into(),
1190 qualifiers: Vec::new(),
1191 }];
1192 let s = Seq {
1193 seq: (0..10).collect(),
1194 topology: Topology::Circular,
1195 features,
1196 ..Seq::empty()
1197 };
1198
1199 for i in 0..10 {
1200 let res = s.extract_range(i, i + 10);
1201 println!("{:?}", res.features);
1202 }
1203 }
1204
1205 #[test]
1206 fn extract_range_wrapped_linear() {
1207 let features = vec![Feature {
1208 location: Location::Join(vec![
1209 Location::simple_range(5, 9),
1210 Location::simple_range(0, 4),
1211 ]),
1212 kind: "".into(),
1213 qualifiers: Vec::new(),
1214 }];
1215 let s = Seq {
1216 seq: (0..10).collect(),
1217 features,
1218 ..Seq::empty()
1219 };
1220 let res = s.extract_range(2, 8);
1221 assert_eq!(res.features.len(), 1);
1222 println!("{:?}", res.features[0].location);
1223 }
1224 #[test]
1225 fn set_origin() {
1226 init();
1227 let seq = Seq {
1228 seq: "0123456789".into(),
1229 features: vec![
1230 Feature {
1231 location: Location::simple_range(2, 7),
1232 kind: "".into(),
1233 qualifiers: Vec::new(),
1234 },
1235 Feature {
1236 location: Location::simple_range(0, 10),
1237 kind: "".into(),
1238 qualifiers: Vec::new(),
1239 },
1240 Feature {
1241 location: Location::Join(vec![
1242 Location::simple_range(7, 10),
1243 Location::simple_range(0, 4),
1244 ]),
1245 kind: "".into(),
1246 qualifiers: Vec::new(),
1247 },
1248 Feature {
1249 location: Location::single(0),
1250 kind: "".into(),
1251 qualifiers: Vec::new(),
1252 },
1253 ],
1254 topology: Topology::Circular,
1255 ..Seq::empty()
1256 };
1257 for i in 1..9 {
1258 println!("******** {}", i);
1259 let rotated = seq.set_origin(i);
1260 println!("rotated {:?}", rotated.features);
1261 let rotated2 = rotated.set_origin(10 - i);
1262 println!("rotated2: {:?}", rotated2.features);
1263 assert_eq!(rotated2.features, seq.features);
1264 }
1265 }
1266 #[test]
1267 fn range_to_location() {
1268 let s = Seq {
1269 seq: "0123456789".into(),
1270 topology: Topology::Linear,
1271 ..Seq::empty()
1272 };
1273 assert_eq!(s.range_to_location(0, 10), Location::simple_range(0, 10));
1274 let s = Seq {
1275 topology: Topology::Circular,
1276 ..s
1277 };
1278 assert_eq!(s.range_to_location(5, 10), Location::simple_range(5, 10));
1279 assert_eq!(s.range_to_location(5, 11).to_gb_format(), "join(6..10,1)");
1280 assert_eq!(
1281 s.range_to_location(5, 15).to_gb_format(),
1282 "join(6..10,1..5)"
1283 );
1284 }
1285 #[test]
1286 fn unwrap_range_linear() {
1287 let s = Seq {
1288 seq: "01".into(),
1289 ..Seq::empty()
1290 };
1291 assert_eq!(s.unwrap_range(0, 1), (0, 1));
1292 assert_eq!(s.unwrap_range(0, 2), (0, 2));
1293 }
1294
1295 #[test]
1296 #[should_panic]
1297 fn unwrap_range_linear_bad() {
1298 let s = Seq {
1299 seq: "01".into(),
1300 ..Seq::empty()
1301 };
1302 let _ = s.unwrap_range(0, 3);
1303 }
1304
1305 #[test]
1306 fn unwrap_range_circular() {
1307 let s = Seq {
1308 seq: "01".into(),
1309 topology: Topology::Circular,
1310 ..Seq::empty()
1311 };
1312 assert_eq!(s.unwrap_range(0, 1), (0, 1));
1313 assert_eq!(s.unwrap_range(0, 2), (0, 2));
1314 assert_eq!(s.unwrap_range(0, 3), (0, 3));
1315 assert_eq!(s.unwrap_range(-1, 0), (1, 2));
1316 assert_eq!(s.unwrap_range(1, 2), (1, 2));
1317 assert_eq!(s.unwrap_range(2, 3), (0, 1));
1318 assert_eq!(s.unwrap_range(-2, -1), (0, 1));
1319 }
1320
1321 #[test]
1322 fn extract_location() {
1323 let s = Seq {
1324 seq: (0..10).collect(),
1325 topology: Topology::Linear,
1326 ..Seq::empty()
1327 };
1328 let p = |l| Location::from_gb_format(l).unwrap();
1329 let e = |l| s.extract_location(&p(l)).unwrap();
1330 assert_eq!(e("1"), vec![0]);
1331 assert_eq!(e("2..6"), vec![1, 2, 3, 4, 5]);
1332 assert_eq!(e("join(complement(2..6),8)"), vec![5, 4, 3, 2, 1, 7]);
1333 assert_eq!(e("1..2"), vec![0, 1]);
1334 assert_eq!(e("complement(1..2)"), vec![1, 0]);
1335 assert_eq!(e("complement(join(8..10,1..2))"), vec![1, 0, 9, 8, 7]);
1336 assert_eq!(
1337 e("join(complement(1..2),complement(8..10))"),
1338 vec![1, 0, 9, 8, 7]
1339 );
1340 }
1341
1342 #[test]
1343 fn extract_location_external() {
1344 let s = Seq {
1345 seq: (0..10).collect(),
1346 ..Seq::empty()
1347 };
1348 let s_ext = Seq {
1349 seq: (10..20).collect(),
1350 ..Seq::empty()
1351 };
1352 let l = Location::from_gb_format("TEST:1..5").unwrap();
1353 assert_eq!(
1354 s.extract_location_with_fetcher(&l, |n| {
1355 assert_eq!(n, "TEST");
1356 Ok(&s_ext)
1357 })
1358 .unwrap(),
1359 vec![10, 11, 12, 13, 14]
1360 );
1361 }
1362
1363 #[test]
1364 fn wrap_location() {
1365 let s = Seq {
1366 seq: (0..10).collect(),
1367 topology: Topology::Circular,
1368 ..Seq::empty()
1369 };
1370 assert_eq!(
1371 Location::single(0),
1372 s.wrap_location(Location::single(0)).unwrap()
1373 );
1374 assert_eq!(
1375 Location::single(0),
1376 s.wrap_location(Location::single(10)).unwrap()
1377 );
1378 assert_eq!(
1379 Location::single(1),
1380 s.wrap_location(Location::single(11)).unwrap()
1381 );
1382 assert_eq!(
1383 Location::simple_range(0, 2),
1384 s.wrap_location(Location::simple_range(10, 12)).unwrap()
1385 );
1386 assert_eq!(
1387 Location::Join(vec![Location::single(9), Location::simple_range(0, 5)]),
1388 s.wrap_location(Location::simple_range(9, 15)).unwrap()
1389 );
1390 assert_eq!(
1391 &s.wrap_location(Location::Range((8, Before(false)), (11, After(true))))
1392 .unwrap()
1393 .to_gb_format(),
1394 "join(9..10,1..>1)"
1395 );
1396 }
1397
1398 #[test]
1399 fn revcomp() {
1400 let make_seq = |locations: Vec<Location>| Seq {
1401 seq: (0..10).collect(),
1402 topology: Topology::Linear,
1403 features: locations
1404 .into_iter()
1405 .map(|p| Feature {
1406 location: p,
1407 kind: "".into(),
1408 qualifiers: Vec::new(),
1409 })
1410 .collect(),
1411 ..Seq::empty()
1412 };
1413 let s = make_seq(vec![]);
1414 assert_eq!(
1415 s.revcomp_location(Location::single(0)).unwrap(),
1416 Location::Complement(Box::new(Location::single(9)))
1417 );
1418 assert_eq!(
1419 s.revcomp_location(Location::Between(0, 1)).unwrap(),
1420 Location::Complement(Box::new(Location::Between(8,9)))
1421 );
1422 assert_eq!(
1423 make_seq(vec![Location::simple_range(0, 2)])
1424 .revcomp()
1425 .features[0]
1426 .location,
1427 Location::Complement(Box::new(Location::simple_range(8, 10)))
1428 );
1429 assert_eq!(
1430 make_seq(vec![Location::Join(vec![
1431 Location::simple_range(0, 2),
1432 Location::simple_range(3, 5)
1433 ])])
1434 .revcomp()
1435 .features[0]
1436 .location,
1437 Location::Complement(Box::new(Location::Join(vec![
1438 Location::simple_range(5, 7),
1439 Location::simple_range(8, 10)
1440 ])))
1441 );
1442 assert_eq!(
1443 make_seq(vec![Location::single(9)]).revcomp().features[0].location,
1444 Location::Complement(Box::new(Location::single(0)))
1445 );
1446 }
1447 #[test]
1448 fn test_simplify() {
1449 let s = |a, b| {
1450 assert_eq!(
1451 simplify(parse_location(a).unwrap()).unwrap().to_gb_format(),
1452 b
1453 );
1454 };
1455 s(b"1..5", "1..5");
1456 s(b"join(1..2,2..5)", "join(1..2,2..5)");
1457 s(b"join(1..2,3..5)", "1..5");
1458 s(b"join(join(1..2,3..4),5)", "1..5");
1459 s(
1460 b"join(complement(1..2),complement(4..5))",
1461 "complement(join(4..5,1..2))",
1462 );
1463 }
1464}