gb_io/
seq.rs

1use std::borrow::{Borrow, Cow};
2use std::cmp;
3use std::error::Error;
4use std::fmt;
5use std::io;
6use std::io::Write;
7use std::str;
8
9use crate::errors::GbParserError;
10use crate::reader::parse_location;
11use crate::dna::revcomp;
12
13#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
14#[derive(Debug, PartialEq, Clone)]
15/// A very simple Date struct so we don't have to rely on chrono
16pub struct Date {
17    year: i32,
18    month: u32,
19    day: u32,
20}
21
22#[derive(Debug, PartialEq, Clone)]
23pub struct DateError;
24
25impl Date {
26    /// Construct from a calendar date, checks that the numbers look
27    /// reasonable but nothing too exhaustive
28    pub fn from_ymd(year: i32, month: u32, day: u32) -> Result<Date, DateError> {
29        if (1..=12).contains(&month) && (1..=31).contains(&day) {
30            Ok(Date { year, month, day })
31        } else {
32            Err(DateError)
33        }
34    }
35    pub fn year(&self) -> i32 {
36        self.year
37    }
38    pub fn month(&self) -> u32 {
39        self.month
40    }
41    pub fn day(&self) -> u32 {
42        self.day
43    }
44}
45
46impl fmt::Display for Date {
47    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
48        let month = match self.month {
49            1 => "JAN",
50            2 => "FEB",
51            3 => "MAR",
52            4 => "APR",
53            5 => "MAY",
54            6 => "JUN",
55            7 => "JUL",
56            8 => "AUG",
57            9 => "SEP",
58            10 => "OCT",
59            11 => "NOV",
60            12 => "DEC",
61            _ => unreachable!(),
62        };
63        write!(f, "{:02}-{}-{:04}", self.day, month, self.year)
64    }
65}
66
67#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
68#[derive(Debug, PartialEq, Clone, Copy)]
69pub enum GapLength {
70    /// gap(n)
71    Known(i64),
72    /// gap()
73    Unknown,
74    /// gap(unk100)
75    Unk100,
76}
77
78#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
79#[derive(Debug, PartialEq, Clone, Copy)]
80pub struct Before(pub bool);
81#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
82#[derive(Debug, PartialEq, Clone, Copy)]
83pub struct After(pub bool);
84
85/// Represents a Genbank "location", used to specify the location of
86/// features and in the CONTIG line. See
87/// <http://www.insdc.org/files/feature_table.html> for a detailed
88/// description of what they mean.
89///
90/// Note that locations specified here must always refer to a
91/// nucleotide within the sequence. Ranges are inclusive in Genbank
92/// format, but represented as exclusive ranges, using 0-based
93/// indexing in this library. For example, `1..3` will be represented
94/// as `Range((0, Before(false)), (4, After(false)))`.
95///
96/// To specify a range that wraps around on a circular sequence,
97/// Genbank files use `join(x..last,1..y)`.
98#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
99#[derive(Debug, PartialEq, Clone)]
100pub enum Location {
101    /// Represents a range of positions, indicated with `[<]x..[>]y` in
102    /// the Genbank file. If `<` or `>` are present, then `Before` or
103    /// `After` will be true, respectively. This means that the
104    /// feature starts before/extends beyond the end point. Genbank
105    /// files represent locations consisting of a single position with
106    /// a single number. In this library this is represented using
107    /// `Range`, i.e. `1` becomes `Range((0, Before(false)), (1,
108    /// After(false)))`.
109    Range((i64, Before), (i64, After)),
110    /// Represented as `n^n+1`: This means that the location is
111    /// between the two _adjacent_ positions specified. On a circular
112    /// sequence the last and first positions are also allowed.
113    Between(i64, i64),
114    /// INSDC: "Find the complement of the presented sequence in the
115    /// span specified by "location" (i.e., read the complement of
116    /// the presented strand in its 5'-to-3' direction)"
117    Complement(Box<Location>),
118    /// INSDC: "The indicated elements should be joined (placed
119    /// end-to-end) to form one contiguous sequence"
120    Join(Vec<Location>),
121    /// INSDC: "The elements can be found in the specified order (5'
122    /// to 3' direction), but nothing is implied about the
123    /// reasonableness about joining them"
124    Order(Vec<Location>),
125    Bond(Vec<Location>),
126    OneOf(Vec<Location>),
127    External(String, Option<Box<Location>>),
128    Gap(GapLength),
129}
130
131#[derive(Debug, Error)]
132#[error("Not configured to fetch external sequences")]
133pub struct NoFetcherError;
134
135#[derive(Debug, Error)]
136pub enum LocationError {
137    #[error("Can't determine location due to ambiguity: {0}")]
138    Ambiguous(Location),
139    #[error("Can't resolve external location `{0}`: {1}")]
140    External(Location, #[source] Box<dyn Error>),
141    #[error("Recursion limit reached while processing: {0}")]
142    Recursion(Location),
143    // TODO: actually implement this
144    #[error("Empty location list encountered")]
145    Empty,
146    #[error("Location refers to a location outside of the sequence: {0}")]
147    OutOfBounds(Location),
148}
149
150impl Location {
151    /// Convenience constructor for this commonly used variant
152    pub fn simple_range(a: i64, b: i64) -> Location {
153        Location::Range((a, Before(false)), (b, After(false)))
154    }
155
156    pub fn single(a: i64) -> Location {
157        Location::simple_range(a, a + 1)
158    }
159
160    /// Try to get the start and end of a location. Returns the
161    /// starting and finishing locations, as an exclusive range.
162    pub fn find_bounds(&self) -> Result<(i64, i64), LocationError> {
163        use Location::*;
164        match *self {
165            Range((a, _), (b, _)) => Ok((a, b)),
166            Complement(ref location) => location.find_bounds(),
167            Join(ref locations) | Order(ref locations) => {
168                let first = locations.first().ok_or(LocationError::Empty)?;
169                let last = locations.last().unwrap();
170                let (start, _) = first.find_bounds()?;
171                let (_, end) = last.find_bounds()?;
172                Ok((start, end))
173            }
174            Between(a, b) => Ok((a, b + 1)),
175            | Bond(ref locations)
176            | OneOf(ref locations) => {
177                let (left, right): (Vec<_>, Vec<_>) = locations
178                    .iter()
179                    .flat_map(Location::find_bounds) // This skips any Err values
180                    .unzip();
181                let min = left.into_iter().min().ok_or(LocationError::Empty)?;
182                let max = right.into_iter().max().unwrap();
183                Ok((min, max))
184            }
185            ref p => Err(LocationError::Ambiguous(p.clone())),
186        }
187    }
188
189    // Only returns `Err` if one of the closures does
190    fn transform<L, V>(self, loc: &L, val: &V) -> Result<Location, LocationError>
191    where
192        L: Fn(Location) -> Result<Location, LocationError>,
193        V: Fn(i64) -> Result<i64, LocationError>,
194    {
195        loc(self)?.transform_impl(loc, val)
196    }
197
198    fn transform_impl<L, V>(self, loc: &L, val: &V) -> Result<Location, LocationError>
199    where
200        L: Fn(Location) -> Result<Location, LocationError>,
201        V: Fn(i64) -> Result<i64, LocationError>,
202    {
203        use Location::*;
204        let t_vec = |ps: Vec<Location>| {
205            ps.into_iter()
206                .map(|p| loc(p)?.transform_impl(loc, val))
207                .collect::<Result<Vec<_>, _>>()
208        };
209        let res = match self {
210            // Apply the location closure
211            Complement(p) => Complement(Box::new(loc(*p)?.transform_impl(loc, val)?)),
212            Order(ps) => Order(t_vec(ps)?),
213            Bond(ps) => Bond(t_vec(ps)?),
214            OneOf(ps) => OneOf(t_vec(ps)?),
215            Join(ps) => Join(t_vec(ps)?),
216            // Apply the value closure
217            Between(a, b) => Between(val(a)?, val(b)?),
218            Range((a, before), (b, after)) => Range((val(a)?, before), (val(b)?, after)),
219            // We don't touch values here
220            External(..) => self,
221            Gap(..) => self,
222        };
223        Ok(res)
224    }
225
226    /// Truncates this location, limiting it to the given bounds.
227    /// Note: `higher` is exclusive.
228    /// `None` is returned if no part of the location lies within the bounds.
229    pub fn truncate(&self, start: i64, end: i64) -> Option<Location> {
230        use Location::*;
231        let filter = |ps: &[Location]| {
232            let res: Vec<_> = ps.iter().filter_map(|p| p.truncate(start, end)).collect();
233            if res.is_empty() {
234                None
235            } else {
236                Some(res)
237            }
238        };
239        match *self {
240            Between(a, b) => {
241                if (a >= start && a < end) || (b >= start && b < end) {
242                    Some(self.clone())
243                } else {
244                    None
245                }
246            }
247            Range((a, before), (b, after)) => {
248                match (a >= start && a < end, b > start && b <= end) {
249                    (true, true) => Some(simplify_shallow(Range((a, before), (b, after))).unwrap()),
250                    (true, false) => {
251                        Some(simplify_shallow(Range((a, before), (end, After(false)))).unwrap())
252                    } // TODO: this should be true?
253                    (false, true) => {
254                        Some(simplify_shallow(Range((start, Before(false)), (b, after))).unwrap())
255                    } // TODO
256                    (false, false) => {
257                        // does the location span (start, end)?
258                        if a <= start && b >= end {
259                            Some(simplify_shallow(Location::simple_range(start, end)).unwrap()) // TODO
260                        } else {
261                            None
262                        }
263                    }
264                }
265            }
266            Complement(ref a) => a.truncate(start, end).map(|a| Complement(Box::new(a))),
267            Join(ref ps) => filter(ps).map(|v| simplify_shallow(Join(v)).unwrap()),
268            OneOf(ref ps) => filter(ps).map(OneOf),
269            Bond(ref ps) => filter(ps).map(Bond),
270            Order(ref ps) => filter(ps).map(Order),
271            External(_, _) | Gap(_) => Some(self.clone()),
272        }
273    }
274
275    fn complement(self) -> Location {
276        match self {
277            Location::Complement(x) => *x,
278            x => Location::Complement(Box::new(x)),
279        }
280    }
281
282    pub fn to_gb_format(&self) -> String {
283        fn location_list(locations: &[Location]) -> String {
284            locations
285                .iter()
286                .map(Location::to_gb_format)
287                .collect::<Vec<_>>()
288                .join(",")
289        }
290        match *self {
291            Location::Between(a, b) => format!("{}^{}", a + 1, b + 1),
292            Location::Range((a, Before(before)), (b, After(after))) => {
293                if !before && !after && b == a + 1 {
294                    format!("{}", a + 1)
295                } else {
296                let before = if before { "<" } else { "" };
297                let after = if after { ">" } else { "" };
298                format!("{}{}..{}{}", before, a + 1, after, b)
299                }
300            }
301            Location::Complement(ref p) => format!("complement({})", p.to_gb_format()),
302            Location::Join(ref locations) => format!("join({})", location_list(locations)),
303            Location::Order(ref locations) => format!("order({})", location_list(locations)),
304            Location::Bond(ref locations) => format!("bond({})", location_list(locations)),
305            Location::OneOf(ref locations) => format!("one-of({})", location_list(locations)),
306            Location::External(ref name, Some(ref location)) => {
307                format!("{}:{}", &name, location.to_gb_format())
308            }
309            Location::External(ref name, None) => format!("{}", name),
310            Location::Gap(GapLength::Known(length)) => format!("gap({})", length),
311            Location::Gap(GapLength::Unknown) => format!("gap()"),
312            Location::Gap(GapLength::Unk100) => format!("gap(unk100)"),
313        }
314    }
315
316    pub fn from_gb_format(s: &str) -> Result<Location, GbParserError> {
317        parse_location(s.as_bytes())
318    }
319}
320
321impl fmt::Display for Location {
322    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
323        write!(f, "{}", self.to_gb_format())
324    }
325}
326
327/// A qualifier such as `/gene="lacZ"`
328pub type Qualifier = (Cow<'static, str>, Option<String>);
329
330/// An annotation, or "feature". Note that one qualifier key can occur multiple
331/// times with distinct values. We store them in a `Vec` to preserve order. Some
332/// qualifiers have no value.
333#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
334#[derive(Debug, PartialEq, Clone)]
335pub struct Feature {
336    pub kind: Cow<'static, str>,
337    pub location: Location,
338    pub qualifiers: Vec<Qualifier>,
339}
340
341impl Feature {
342    /// Returns all the values for a given qualifier key. Qualifiers with no
343    /// value (ie. `/foo`) are ignored
344    pub fn qualifier_values<'a>(&'a self, key: &'a str) -> impl Iterator<Item = &'a str> {
345        self.qualifiers
346            .iter()
347            .filter(move |&(k, _)| k == key)
348            .filter_map(|(_, v)| v.as_ref().map(String::as_str))
349    }
350}
351
352#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
353#[derive(Debug, PartialEq, Clone)]
354pub enum Topology {
355    Linear,
356    Circular,
357}
358
359impl fmt::Display for Topology {
360    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
361        let res = match *self {
362            Topology::Linear => "linear",
363            Topology::Circular => "circular",
364        };
365        write!(f, "{}", res)
366    }
367}
368
369#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
370#[derive(Debug, PartialEq, Clone)]
371pub struct Source {
372    pub source: String,
373    pub organism: Option<String>,
374}
375
376#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
377#[derive(Debug, PartialEq, Clone)]
378pub struct Reference {
379    pub description: String,
380    pub authors: Option<String>,
381    pub consortium: Option<String>,
382    pub title: String,
383    pub journal: Option<String>,
384    pub pubmed: Option<String>,
385    pub remark: Option<String>,
386}
387
388/// Maximum length for which the buffer holding the sequence will be
389/// preallocated. This isn't a hard limit though... should it be?
390#[doc(hidden)]
391pub const REASONABLE_SEQ_LEN: usize = 500 * 1000 * 1000;
392
393#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
394#[derive(Debug, PartialEq, Clone)]
395pub struct Seq {
396    /// Name as specified in the LOCUS line
397    pub name: Option<String>,
398    /// Whether this molecule is linear or circular
399    pub topology: Topology,
400    /// The date specified in the LOCUS line
401    pub date: Option<Date>,
402    /// Length as specified in the LOCUS line. Note that this may differ from
403    /// `seq.len()`, especially if `contig` is `Some` and `seq` is empty. When
404    /// parsing a file, if sequence data is provided we check that this value is
405    /// equal to `seq.len()`
406    pub len: Option<usize>,
407    // TODO: should this be an option?
408    pub molecule_type: Option<String>,
409    pub division: String,
410    pub definition: Option<String>,
411    pub accession: Option<String>,
412    pub version: Option<String>,
413    pub source: Option<Source>,
414    pub dblink: Option<String>,
415    pub keywords: Option<String>,
416    pub references: Vec<Reference>,
417    pub comments: Vec<String>,
418    #[cfg_attr(feature = "serde", serde(with = "serde_bytes"))]
419    pub seq: Vec<u8>,
420    pub contig: Option<Location>,
421    pub features: Vec<Feature>,
422}
423
424impl Seq {
425    /// Create a new, empty `Seq`
426    pub fn empty() -> Seq {
427        Seq {
428            seq: vec![],
429            contig: None,
430            definition: None,
431            accession: None,
432            molecule_type: None,
433            division: String::from("UNK"),
434            dblink: None,
435            keywords: None,
436            source: None,
437            version: None,
438            comments: vec![],
439            references: vec![],
440            name: None,
441            topology: Topology::Linear,
442            date: None,
443            len: None,
444            features: vec![],
445        }
446    }
447
448    pub fn is_circular(&self) -> bool {
449        match self.topology {
450            Topology::Circular => true,
451            Topology::Linear => false,
452        }
453    }
454
455    /// Returns the "actual" length of the sequence. Note that this may not be
456    /// equal to self.seq.len(), in the following circumstances:
457    /// - `self.seq` is empty and `self.contig` is not, and the corresponding
458    ///   file's LOCUS line specified a length. The returned value is i64 to
459    ///   simplifiy arithmetic with coords from `Location`
460    pub fn len(&self) -> i64 {
461        if let Some(len) = self.len {
462            assert!(self.seq.is_empty() || len == self.seq.len());
463            len as i64
464        } else {
465            self.seq.len() as i64
466        }
467    }
468
469    /// "Normalises" an exclusive range on the chromosome, to make handling circular
470    /// sequences simpler. For linear sequences, it doesn't do anything except
471    /// panic unless `start` and `end` are both within the sequence, or if `end
472    /// <= start`. For circular sequences, all values, including negative values,
473    /// are allowed.
474    ///
475    /// If `end` <= `start`, the range is assumed to wrap around. The returned
476    /// values will satisfy the following conditions:
477    /// - `0 <= first < len`
478    /// - `first < last`
479    ///
480    /// This means that in the case of a range which wraps around, `last` >= `len`.
481    pub fn unwrap_range(&self, start: i64, end: i64) -> (i64, i64) {
482        let len = self.len();
483        match self.topology {
484            Topology::Linear => {
485                assert!(start <= end);
486                assert!(start >= 0 && start < len && end >= 0 && end <= len);
487                (start, end)
488            }
489            Topology::Circular => {
490                let mut start = start;
491                let mut end = end;
492
493                if start >= end {
494                    end += len;
495                }
496
497                while start >= len {
498                    start -= len;
499                    end -= len;
500                }
501
502                while start < 0 {
503                    start += len;
504                    end += len;
505                }
506
507                assert!(start >= 0 && start < len);
508                assert!(end > start);
509
510                (start, end)
511            }
512        }
513    }
514    /// Given a range on this sequence, returns a corresponding `Location`
515    /// Note: this is a rust-style, exclusive range
516    pub fn range_to_location(&self, start: i64, end: i64) -> Location {
517        let res = match self.topology {
518            Topology::Linear => {
519                assert!(end > start);
520                assert!(start < self.len() && end <= self.len());
521                Location::simple_range(start, end)
522            }
523            Topology::Circular => {
524                let (start, end) = self.unwrap_range(start, end);
525                if end > self.len() {
526                    assert!(end < self.len() * 2, "Range wraps around more than once!");
527                    Location::Join(vec![
528                        Location::simple_range(start, self.len()),
529                        Location::simple_range(0, end - self.len()),
530                    ])
531                } else {
532                    Location::simple_range(start, end)
533                }
534            }
535        };
536        simplify(res).expect("invalid Location not possible here")
537    }
538
539    /// "Wraps" a location on a circular sequence, so that coordinates that
540    /// extend beyond the end of the sequence are are wrapped to the origin.
541    pub fn wrap_location(&self, p: Location) -> Result<Location, LocationError> {
542        use Location::*;
543        let res = p
544            .transform(
545                &|p| {
546                    let res = match p {
547                        Range((mut a, before), (mut b, after)) => {
548                            while a >= self.len() {
549                                a -= self.len();
550                                b -= self.len();
551                            }
552                            if b <= self.len() {
553                                Range((a, before), (b, after))
554                            } else {
555                                Join(vec![
556                                    Range((a, before), (self.len(), After(false))),
557                                    Range((0, Before(false)), (b - self.len(), after)),
558                                ])
559                            }
560                        }
561                        p => p,
562                    };
563                    Ok(res)
564                },
565                &Ok,
566            )
567            .unwrap();
568        simplify(res)
569    }
570
571    /// "Shifts" a feature forwards by `shift` NTs (can be negative)
572    /// Note: If this fails you won't get the original `Feature`
573    /// back. If this is important, you should clone first
574    pub fn relocate_feature(&self, f: Feature, shift: i64) -> Result<Feature, LocationError> {
575        let res = Feature {
576            location: self.relocate_location(f.location, shift)?,
577            ..f
578        };
579        Ok(res)
580    }
581
582    /// "Shifts" a location forwards by `shift` NTs (can be negative)
583    /// Note: If this fails you won't get the original `Location`
584    /// back. If this is important, you should clone first
585    pub fn relocate_location(
586        &self,
587        p: Location,
588        mut shift: i64,
589    ) -> Result<Location, LocationError> {
590        if self.is_circular() {
591            while shift < 0 {
592                shift += self.len();
593            }
594            while shift >= self.len() {
595                shift -= self.len();
596            }
597            let moved = p.transform(&Ok, &|v| Ok(v + shift)).unwrap(); // can't fail
598            self.wrap_location(moved)
599        } else {
600            Ok(p.transform(&Ok, &|v| Ok(v + shift)).unwrap())
601        }
602    }
603
604    /// Used by `revcomp`
605    fn revcomp_location(&self, p: Location) -> Result<Location, LocationError> {
606        let p = p
607            .transform(
608                &|mut p| {
609                    match p {
610                        Location::Join(ref mut locations)
611                        | Location::Order(ref mut locations)
612                        | Location::Bond(ref mut locations)
613                        | Location::OneOf(ref mut locations) => {
614                            locations.reverse();
615                        }
616                        _ => (),
617                    };
618                    let p = match p {
619                        Location::Range((a, Before(before)), (b, After(after))) => {
620                            Location::Range((b, Before(after)), (a, After(before)))
621                        }
622                        Location::Between(a, b) => Location::Between(b + 1, a + 1),
623                        p => p,
624                    };
625                    Ok(p)
626                },
627                &|v| Ok(self.len() - v),
628            )
629            .unwrap(); // can't fail
630        simplify(p).map(Location::complement)
631    }
632
633    /// Note: If this fails you won't get the original `Feature`
634    /// back. If this is important, you should clone first
635    pub fn revcomp_feature(&self, f: Feature) -> Result<Feature, LocationError> {
636        Ok(Feature {
637            location: self.revcomp_location(f.location)?,
638            ..f
639        })
640    }
641
642    /// Returns the reverse complement of a `Seq`, skipping any features
643    /// which can't be processed with a warning
644    pub fn revcomp(&self) -> Seq {
645        let mut features = Vec::with_capacity(self.features.len());
646        for f in &self.features {
647            match self.revcomp_feature(f.clone()) {
648                Ok(f) => features.push(f),
649                Err(e) => warn!("Encountered invalid feature location: {}", e),
650            }
651        }
652        Seq {
653            features,
654            seq: revcomp(&self.seq),
655            ..self.clone()
656        }
657    }
658
659    /// Extracts just the sequence from `start` to `end`, taking into
660    /// account circularity. Note that `end` is exclusive. Use
661    /// this instead of `extract_range` if you don't need the
662    /// features.
663    pub fn extract_range_seq(&self, start: i64, end: i64) -> Cow<[u8]> {
664        // Here we use usize everywhere for convenience, since we will never
665        // have to deal with negative values
666        let len = self.len() as usize;
667        assert!(len == self.seq.len());
668        let (start, end) = self.unwrap_range(start, end);
669        let mut start = start as usize;
670        let mut end = end as usize;
671        if end <= len {
672            Cow::from(&self.seq[start..end])
673        } else {
674            assert!(self.is_circular());
675            let mut res = Vec::with_capacity(end - start);
676            while end != 0 {
677                let slice_end = cmp::min(end, len);
678                end -= slice_end;
679                res.extend(&self.seq[start..slice_end]);
680                start = 0;
681            }
682            Cow::from(res)
683        }
684    }
685
686    /// Extracts from `start` to `end`, truncating features that
687    /// extend beyond this range.  Note that `end` is not
688    /// inclusive. Skips ambiguous features with a warning.
689    pub fn extract_range(&self, start: i64, end: i64) -> Seq {
690        let (start, end) = self.unwrap_range(start, end);
691        let mut shift = -start;
692        if self.is_circular() {
693            while shift < 0 {
694                shift += self.len();
695            }
696            while shift > self.len() {
697                shift -= self.len();
698            }
699        }
700        let features = self
701            .features
702            .iter()
703            .flat_map(
704                |f| match self.relocate_location(f.location.clone(), shift) {
705                    // let `truncate` filter locations outside the range
706                    Ok(l) => l.truncate(0, end - start).map(|location| Feature {
707                        location,
708                        ..f.clone()
709                    }),
710                    Err(e) => {
711                        warn!("Skipping feature, can't process invalid location: {}", e);
712                        None
713                    }
714                },
715            )
716            .collect();
717        Seq {
718            features,
719            seq: self.extract_range_seq(start, end).into(),
720            ..Seq::empty()
721        }
722    }
723
724    /// Extract the sequence specified by `l`. This version returns
725    /// `Err(LocationError::External(_, NoFetcherError))` if it
726    /// encounters a reference to an external sequence.
727    ///
728    /// See `extract_location_with_fetcher` for details
729    pub fn extract_location(&self, l: &Location) -> Result<Vec<u8>, LocationError> {
730        self.extract_location_with_fetcher(l, |_| Err::<Seq, _>(Box::new(NoFetcherError)))
731    }
732
733    /// Extract the sequence specified by `l`. If the location
734    /// references an external sequence, `ext_fetcher` will be called
735    /// with the name of this sequence to retrieve it. Since an external
736    /// feature may be referenced multiple times, it might be best to
737    /// return `&Seq` or similar from `ext_fetcher`.
738    pub fn extract_location_with_fetcher<'a, F, S>(
739        &self,
740        l: &Location,
741        mut ext_fetcher: F,
742    ) -> Result<Vec<u8>, LocationError>
743    where
744        F: (FnMut(&str) -> Result<S, Box<dyn Error>>) + 'a,
745        S: Borrow<Seq> + 'a,
746    {
747        self.extract_location_impl(l, &mut ext_fetcher)
748    }
749
750    fn extract_location_impl<'a, F, S>(
751        &self,
752        l: &Location,
753        ext_fetcher: &mut F,
754    ) -> Result<Vec<u8>, LocationError>
755    where
756        F: (FnMut(&str) -> Result<S, Box<dyn Error>>) + 'a,
757        S: Borrow<Seq> + 'a,
758    {
759        let get_range = |from: i64, to: i64| -> Result<&[u8], LocationError> {
760            let usize_or = |a: i64| -> Result<usize, LocationError> {
761                if a < 0 {
762                    Err(LocationError::OutOfBounds(l.clone()))
763                } else {
764                    Ok(a as usize)
765                }
766            };
767            let s = self
768                .seq
769                .get(usize_or(from)?..usize_or(to)?)
770                .ok_or_else(|| LocationError::OutOfBounds(l.clone()))?;
771            Ok(s)
772        };
773        use Location::*;
774        let res = match *l {
775            Range((a, _), (b, _)) => get_range(a, b)?.into(),
776            Join(ref ls) => {
777                let mut res = Vec::new();
778                for l in ls {
779                    res.extend_from_slice(&self.extract_location_impl(l, ext_fetcher)?);
780                }
781                res
782            }
783            Complement(ref l) => revcomp(&self.extract_location_impl(l, ext_fetcher)?),
784            External(ref name, ref ext_l) => {
785                let ext_seq =
786                    ext_fetcher(name).map_err(|e| LocationError::External(l.clone(), e))?;
787                if let Some(ext_l) = ext_l {
788                    ext_seq.borrow().extract_location_impl(ext_l, ext_fetcher)?
789                } else {
790                    ext_seq.borrow().seq.clone()
791                }
792            }
793            _ => return Err(LocationError::Ambiguous(l.clone())),
794        };
795        Ok(res)
796    }
797
798    /// Returns a new `Seq`, rotated so that `origin` is at the start
799    pub fn set_origin(&self, origin: i64) -> Seq {
800        assert!(self.is_circular());
801        assert!(origin < self.len());
802        let rotated = self.extract_range_seq(origin, origin);
803        Seq {
804            seq: rotated.into(),
805            features: self
806                .features
807                .iter()
808                .cloned()
809                .flat_map(|f| self.relocate_feature(f, -origin))
810                .collect(),
811            ..self.clone()
812        }
813    }
814
815    pub fn write<T: Write>(&self, file: T) -> io::Result<()> {
816        crate::writer::write(file, self)
817    }
818}
819
820//TODO: should we merge adjacent locations when Before/After is set?
821fn merge_adjacent(ps: Vec<Location>) -> Vec<Location> {
822    use Location::*;
823    let mut res: Vec<Location> = Vec::with_capacity(ps.len());
824    for p in ps {
825        if let Some(last) = res.last_mut() {
826            match (&last, p) {
827                (Range(a, (ref b, After(false))), Range((c, Before(false)), d)) => {
828                    if *b == c {
829                        *last = Range(*a, d);
830                    } else {
831                        res.push(Range((c, Before(false)), d));
832                    }
833                }
834                (_, p) => res.push(p),
835            }
836        } else {
837            res.push(p);
838        }
839    }
840    res
841}
842
843fn flatten_join(v: Vec<Location>) -> Vec<Location> {
844    let mut res = Vec::with_capacity(v.len());
845    for p in v {
846        match p {
847            Location::Join(xs) => res.extend(flatten_join(xs)),
848            p => res.push(p),
849        }
850    }
851    res
852}
853
854/// This doesn't simplify everything yet...
855/// TODO: return original Location somehow on failure
856fn simplify(p: Location) -> Result<Location, LocationError> {
857    p.transform(&simplify_shallow, &Ok)
858}
859
860fn simplify_shallow(p: Location) -> Result<Location, LocationError> {
861    use Location::*;
862    match p {
863        Join(xs) => {
864            if xs.is_empty() {
865                return Err(LocationError::Empty);
866            }
867            let xs = flatten_join(xs);
868            let mut xs = merge_adjacent(xs);
869            assert!(!xs.is_empty());
870            if xs.len() == 1 {
871                // remove the join, we now have a new type of location
872                // so we need to simplify again
873                Ok(simplify_shallow(xs.pop().unwrap())?)
874            } else {
875                //if everything is 'complement', reverse the order and move it outside
876                if xs
877                    .iter()
878                    .all(|x| matches!(x, Complement(_)))
879                {
880                    xs = xs.into_iter().rev().map(Location::complement).collect();
881                    Ok(Join(xs).complement())
882                } else {
883                    Ok(Join(xs))
884                }
885            }
886        }
887        p => Ok(p),
888    }
889}
890
891#[cfg(test)]
892mod test {
893    use super::*;
894    use crate::tests::init;
895
896    #[test]
897    fn test_merge_adj() {
898        use Location::*;
899        assert_eq!(
900            merge_adjacent(vec![
901                Location::simple_range(0, 4),
902                Location::simple_range(5, 8),
903            ]),
904            vec![Location::simple_range(0, 4), Location::simple_range(5, 8)]
905        );
906        assert_eq!(
907            merge_adjacent(vec![
908                Location::simple_range(0, 5),
909                Location::simple_range(5, 8),
910            ]),
911            vec![Location::simple_range(0, 8)]
912        );
913        assert_eq!(
914            merge_adjacent(vec![Location::simple_range(0, 5), Location::single(5)]),
915            vec![Location::simple_range(0, 6)]
916        );
917        assert_eq!(
918            merge_adjacent(vec![Location::single(0), Location::simple_range(1, 5)]),
919            vec![Location::simple_range(0, 5)]
920        );
921        assert_eq!(
922            merge_adjacent(vec![Location::single(0), Location::single(1)]),
923            vec![Location::simple_range(0, 2)]
924        );
925        assert_eq!(
926            &Join(merge_adjacent(vec![
927                Location::Range((1, Before(true)), (3, After(false))),
928                Location::Range((3, Before(false)), (5, After(true)))
929            ]))
930            .to_gb_format(),
931            "join(<2..>5)"
932        );
933    }
934
935    #[test]
936    fn pos_relocate_circular() {
937        let s = Seq {
938            seq: b"0123456789".to_vec(),
939            topology: Topology::Circular,
940            ..Seq::empty()
941        };
942        assert_eq!(
943            s.relocate_location(Location::single(5), -9).unwrap(),
944            Location::single(6)
945        );
946        let span1 = Location::simple_range(7, 10);
947        let span2 = Location::simple_range(0, 4);
948        assert_eq!(span1.to_gb_format(), String::from("8..10"));
949        let join = Location::Join(vec![span1, span2]);
950        assert_eq!(
951            s.relocate_location(join, -5).unwrap().to_gb_format(),
952            String::from("3..9")
953        );
954    }
955
956    #[test]
957    fn relocate_location() {
958        let s = Seq {
959            seq: b"0123456789".to_vec(),
960            topology: Topology::Circular,
961            ..Seq::empty()
962        };
963        assert_eq!(
964            &s.relocate_location(Location::simple_range(0, 10), 5)
965                .unwrap()
966                .to_gb_format(),
967            "join(6..10,1..5)"
968        );
969        assert_eq!(
970            &s.relocate_location(Location::simple_range(5, 8), 5)
971                .unwrap()
972                .to_gb_format(),
973            "1..3"
974        );
975        assert_eq!(
976            &s.relocate_location(
977                Location::Join(vec![
978                    Location::simple_range(7, 10),
979                    Location::simple_range(0, 3)
980                ]),
981                2
982            )
983            .unwrap()
984            .to_gb_format(),
985            "join(10,1..5)"
986        );
987        assert_eq!(
988            &s.relocate_location(
989                Location::Join(vec![
990                    Location::simple_range(8, 10),
991                    Location::simple_range(0, 2)
992                ]),
993                2
994            )
995            .unwrap()
996            .to_gb_format(),
997            "1..4"
998        );
999        assert_eq!(
1000            &s.relocate_location(Location::simple_range(0, 3), 5)
1001                .unwrap()
1002                .to_gb_format(),
1003            "6..8"
1004        );
1005        assert_eq!(
1006            s.relocate_location(Location::single(5), -9).unwrap(),
1007            Location::single(6)
1008        );
1009        let span1 = Location::simple_range(7, 10);
1010        let span2 = Location::simple_range(0, 4);
1011        let join = Location::Join(vec![span1, span2]);
1012        assert_eq!(
1013            &s.relocate_location(join, -5).unwrap().to_gb_format(),
1014            "3..9"
1015        );
1016    }
1017
1018    #[test]
1019    fn extract_range_seq() {
1020        let s = Seq {
1021            seq: b"0123456789".to_vec(),
1022            topology: Topology::Circular,
1023            ..Seq::empty()
1024        };
1025        assert_eq!(
1026            s.extract_range_seq(0, 10).into_owned(),
1027            b"0123456789".to_vec()
1028        );
1029        assert_eq!(
1030            s.extract_range_seq(0, 11).into_owned(),
1031            b"01234567890".to_vec()
1032        );
1033        assert_eq!(
1034            s.extract_range_seq(-1, 10).into_owned(),
1035            b"90123456789".to_vec()
1036        );
1037        assert_eq!(
1038            s.extract_range_seq(-1, 11).into_owned(),
1039            b"901234567890".to_vec()
1040        );
1041        assert_eq!(
1042            s.extract_range_seq(-1, 21).into_owned(),
1043            b"9012345678901234567890".to_vec()
1044        );
1045    }
1046
1047    #[test]
1048    fn test_extract_linear() {
1049        let features = vec![Feature {
1050            location: Location::simple_range(0, 100),
1051            kind: "".into(),
1052            qualifiers: Vec::new(),
1053        }];
1054        let s = Seq {
1055            seq: ::std::iter::repeat(b'A').take(100).collect(),
1056            topology: Topology::Linear,
1057            features,
1058            ..Seq::empty()
1059        };
1060        for i in 0..91 {
1061            for j in 1..11 {
1062                let res = s.extract_range(i, i + j);
1063                assert_eq!(res.features.len(), 1);
1064                assert_eq!(
1065                    res.features[0].location,
1066                    simplify(Location::simple_range(0, j)).unwrap()
1067                );
1068            }
1069        }
1070    }
1071
1072    #[test]
1073    fn test_extract_circular() {
1074        let whole_seq = Feature {
1075            location: Location::simple_range(0, 10),
1076            kind: "".into(),
1077            qualifiers: Vec::new(),
1078        };
1079        let make_pos = |from: i64, to: i64| -> Location {
1080            if to > 10 {
1081                Location::Join(vec![
1082                    simplify(Location::simple_range(from, 10)).unwrap(),
1083                    simplify(Location::simple_range(0, to - 10)).unwrap(),
1084                ])
1085            } else {
1086                simplify(Location::simple_range(from, to)).unwrap()
1087            }
1088        };
1089
1090        for i in 0..10 {
1091            for j in 1..3 {
1092                let s = Seq {
1093                    seq: ::std::iter::repeat(b'A').take(10).collect(),
1094                    topology: Topology::Circular,
1095                    features: vec![
1096                        whole_seq.clone(),
1097                        Feature {
1098                            location: make_pos(i, i + j),
1099                            kind: "".into(),
1100                            qualifiers: Vec::new(),
1101                        },
1102                    ],
1103                    ..Seq::empty()
1104                };
1105                let res = s.extract_range(i, i + j);
1106                assert_eq!(res.features.len(), 2);
1107                if i < 8 {
1108                    assert_eq!(
1109                        res.features[0].location,
1110                        simplify(Location::simple_range(0, j)).unwrap()
1111                    );
1112                }
1113                println!("1 {:?}", res.features[0]);
1114                println!("{} {}", i, j);
1115                if i < 8 {
1116                    assert_eq!(
1117                        res.features[1].location,
1118                        simplify(Location::simple_range(0, j)).unwrap()
1119                    );
1120                }
1121                println!("2 {:?}", res.features[1]);
1122            }
1123        }
1124    }
1125
1126    #[test]
1127    fn extract_exclude_features() {
1128        let s = Seq {
1129            topology: Topology::Circular,
1130            seq: (0..10).collect(),
1131            features: vec![Feature {
1132                location: Location::simple_range(0, 4),
1133                kind: "".into(),
1134                qualifiers: vec![],
1135            }],
1136            ..Seq::empty()
1137        };
1138        let res = s.extract_range(4, 10);
1139        assert_eq!(res.features, vec![]);
1140        let res = s.extract_range(0, 1);
1141        assert_eq!(res.features.len(), 1);
1142        assert_eq!(&res.features[0].location, &Location::single(0));
1143        let res = s.extract_range(3, 4);
1144        assert_eq!(res.features.len(), 1);
1145        assert_eq!(&res.features[0].location, &Location::single(0));
1146        let res = s.extract_range(0, 10);
1147        assert_eq!(&res.features[0].location, &Location::simple_range(0, 4));
1148    }
1149
1150    #[test]
1151    fn truncate() {
1152        assert_eq!(
1153            Location::single(0).truncate(0, 1),
1154            Some(Location::single(0))
1155        );
1156        assert_eq!(Location::single(0).truncate(1, 2), None);
1157        assert_eq!(
1158            Location::simple_range(0, 3).truncate(1, 2),
1159            Some(Location::single(1))
1160        );
1161        assert_eq!(
1162            Location::simple_range(0, 2).truncate(0, 2),
1163            Some(Location::simple_range(0, 2))
1164        );
1165        assert_eq!(
1166            Location::Complement(Box::new(Location::simple_range(0, 2))).truncate(0, 2),
1167            Some(Location::Complement(Box::new(Location::simple_range(0, 2))))
1168        );
1169        assert_eq!(
1170            Location::Complement(Box::new(Location::simple_range(0, 2))).truncate(10, 20),
1171            None
1172        );
1173        assert_eq!(Location::simple_range(0, 2).truncate(3, 4), None);
1174        let p = Location::Join(vec![
1175            Location::simple_range(0, 3),
1176            Location::simple_range(4, 7),
1177        ]);
1178        assert_eq!(p.truncate(0, 3), Some(Location::simple_range(0, 3)));
1179        assert_eq!(p.truncate(10, 30), None);
1180    }
1181
1182    #[test]
1183    fn test_extract_circular_split() {
1184        let features = vec![Feature {
1185            location: Location::Join(vec![
1186                Location::simple_range(0, 2),
1187                Location::simple_range(4, 9),
1188            ]),
1189            kind: "".into(),
1190            qualifiers: Vec::new(),
1191        }];
1192        let s = Seq {
1193            seq: (0..10).collect(),
1194            topology: Topology::Circular,
1195            features,
1196            ..Seq::empty()
1197        };
1198
1199        for i in 0..10 {
1200            let res = s.extract_range(i, i + 10);
1201            println!("{:?}", res.features);
1202        }
1203    }
1204
1205    #[test]
1206    fn extract_range_wrapped_linear() {
1207        let features = vec![Feature {
1208            location: Location::Join(vec![
1209                Location::simple_range(5, 9),
1210                Location::simple_range(0, 4),
1211            ]),
1212            kind: "".into(),
1213            qualifiers: Vec::new(),
1214        }];
1215        let s = Seq {
1216            seq: (0..10).collect(),
1217            features,
1218            ..Seq::empty()
1219        };
1220        let res = s.extract_range(2, 8);
1221        assert_eq!(res.features.len(), 1);
1222        println!("{:?}", res.features[0].location);
1223    }
1224    #[test]
1225    fn set_origin() {
1226        init();
1227        let seq = Seq {
1228            seq: "0123456789".into(),
1229            features: vec![
1230                Feature {
1231                    location: Location::simple_range(2, 7),
1232                    kind: "".into(),
1233                    qualifiers: Vec::new(),
1234                },
1235                Feature {
1236                    location: Location::simple_range(0, 10),
1237                    kind: "".into(),
1238                    qualifiers: Vec::new(),
1239                },
1240                Feature {
1241                    location: Location::Join(vec![
1242                        Location::simple_range(7, 10),
1243                        Location::simple_range(0, 4),
1244                    ]),
1245                    kind: "".into(),
1246                    qualifiers: Vec::new(),
1247                },
1248                Feature {
1249                    location: Location::single(0),
1250                    kind: "".into(),
1251                    qualifiers: Vec::new(),
1252                },
1253            ],
1254            topology: Topology::Circular,
1255            ..Seq::empty()
1256        };
1257        for i in 1..9 {
1258            println!("******** {}", i);
1259            let rotated = seq.set_origin(i);
1260            println!("rotated {:?}", rotated.features);
1261            let rotated2 = rotated.set_origin(10 - i);
1262            println!("rotated2: {:?}", rotated2.features);
1263            assert_eq!(rotated2.features, seq.features);
1264        }
1265    }
1266    #[test]
1267    fn range_to_location() {
1268        let s = Seq {
1269            seq: "0123456789".into(),
1270            topology: Topology::Linear,
1271            ..Seq::empty()
1272        };
1273        assert_eq!(s.range_to_location(0, 10), Location::simple_range(0, 10));
1274        let s = Seq {
1275            topology: Topology::Circular,
1276            ..s
1277        };
1278        assert_eq!(s.range_to_location(5, 10), Location::simple_range(5, 10));
1279        assert_eq!(s.range_to_location(5, 11).to_gb_format(), "join(6..10,1)");
1280        assert_eq!(
1281            s.range_to_location(5, 15).to_gb_format(),
1282            "join(6..10,1..5)"
1283        );
1284    }
1285    #[test]
1286    fn unwrap_range_linear() {
1287        let s = Seq {
1288            seq: "01".into(),
1289            ..Seq::empty()
1290        };
1291        assert_eq!(s.unwrap_range(0, 1), (0, 1));
1292        assert_eq!(s.unwrap_range(0, 2), (0, 2));
1293    }
1294
1295    #[test]
1296    #[should_panic]
1297    fn unwrap_range_linear_bad() {
1298        let s = Seq {
1299            seq: "01".into(),
1300            ..Seq::empty()
1301        };
1302        let _ = s.unwrap_range(0, 3);
1303    }
1304
1305    #[test]
1306    fn unwrap_range_circular() {
1307        let s = Seq {
1308            seq: "01".into(),
1309            topology: Topology::Circular,
1310            ..Seq::empty()
1311        };
1312        assert_eq!(s.unwrap_range(0, 1), (0, 1));
1313        assert_eq!(s.unwrap_range(0, 2), (0, 2));
1314        assert_eq!(s.unwrap_range(0, 3), (0, 3));
1315        assert_eq!(s.unwrap_range(-1, 0), (1, 2));
1316        assert_eq!(s.unwrap_range(1, 2), (1, 2));
1317        assert_eq!(s.unwrap_range(2, 3), (0, 1));
1318        assert_eq!(s.unwrap_range(-2, -1), (0, 1));
1319    }
1320
1321    #[test]
1322    fn extract_location() {
1323        let s = Seq {
1324            seq: (0..10).collect(),
1325            topology: Topology::Linear,
1326            ..Seq::empty()
1327        };
1328        let p = |l| Location::from_gb_format(l).unwrap();
1329        let e = |l| s.extract_location(&p(l)).unwrap();
1330        assert_eq!(e("1"), vec![0]);
1331        assert_eq!(e("2..6"), vec![1, 2, 3, 4, 5]);
1332        assert_eq!(e("join(complement(2..6),8)"), vec![5, 4, 3, 2, 1, 7]);
1333        assert_eq!(e("1..2"), vec![0, 1]);
1334        assert_eq!(e("complement(1..2)"), vec![1, 0]);
1335        assert_eq!(e("complement(join(8..10,1..2))"), vec![1, 0, 9, 8, 7]);
1336        assert_eq!(
1337            e("join(complement(1..2),complement(8..10))"),
1338            vec![1, 0, 9, 8, 7]
1339        );
1340    }
1341
1342    #[test]
1343    fn extract_location_external() {
1344        let s = Seq {
1345            seq: (0..10).collect(),
1346            ..Seq::empty()
1347        };
1348        let s_ext = Seq {
1349            seq: (10..20).collect(),
1350            ..Seq::empty()
1351        };
1352        let l = Location::from_gb_format("TEST:1..5").unwrap();
1353        assert_eq!(
1354            s.extract_location_with_fetcher(&l, |n| {
1355                assert_eq!(n, "TEST");
1356                Ok(&s_ext)
1357            })
1358            .unwrap(),
1359            vec![10, 11, 12, 13, 14]
1360        );
1361    }
1362
1363    #[test]
1364    fn wrap_location() {
1365        let s = Seq {
1366            seq: (0..10).collect(),
1367            topology: Topology::Circular,
1368            ..Seq::empty()
1369        };
1370        assert_eq!(
1371            Location::single(0),
1372            s.wrap_location(Location::single(0)).unwrap()
1373        );
1374        assert_eq!(
1375            Location::single(0),
1376            s.wrap_location(Location::single(10)).unwrap()
1377        );
1378        assert_eq!(
1379            Location::single(1),
1380            s.wrap_location(Location::single(11)).unwrap()
1381        );
1382        assert_eq!(
1383            Location::simple_range(0, 2),
1384            s.wrap_location(Location::simple_range(10, 12)).unwrap()
1385        );
1386        assert_eq!(
1387            Location::Join(vec![Location::single(9), Location::simple_range(0, 5)]),
1388            s.wrap_location(Location::simple_range(9, 15)).unwrap()
1389        );
1390        assert_eq!(
1391            &s.wrap_location(Location::Range((8, Before(false)), (11, After(true))))
1392                .unwrap()
1393                .to_gb_format(),
1394            "join(9..10,1..>1)"
1395        );
1396    }
1397
1398    #[test]
1399    fn revcomp() {
1400        let make_seq = |locations: Vec<Location>| Seq {
1401            seq: (0..10).collect(),
1402            topology: Topology::Linear,
1403            features: locations
1404                .into_iter()
1405                .map(|p| Feature {
1406                    location: p,
1407                    kind: "".into(),
1408                    qualifiers: Vec::new(),
1409                })
1410                .collect(),
1411            ..Seq::empty()
1412        };
1413        let s = make_seq(vec![]);
1414        assert_eq!(
1415            s.revcomp_location(Location::single(0)).unwrap(),
1416            Location::Complement(Box::new(Location::single(9)))
1417        );
1418        assert_eq!(
1419            s.revcomp_location(Location::Between(0, 1)).unwrap(),
1420            Location::Complement(Box::new(Location::Between(8,9)))
1421        );
1422        assert_eq!(
1423            make_seq(vec![Location::simple_range(0, 2)])
1424                .revcomp()
1425                .features[0]
1426                .location,
1427            Location::Complement(Box::new(Location::simple_range(8, 10)))
1428        );
1429        assert_eq!(
1430            make_seq(vec![Location::Join(vec![
1431                Location::simple_range(0, 2),
1432                Location::simple_range(3, 5)
1433            ])])
1434            .revcomp()
1435            .features[0]
1436                .location,
1437            Location::Complement(Box::new(Location::Join(vec![
1438                Location::simple_range(5, 7),
1439                Location::simple_range(8, 10)
1440            ])))
1441        );
1442        assert_eq!(
1443            make_seq(vec![Location::single(9)]).revcomp().features[0].location,
1444            Location::Complement(Box::new(Location::single(0)))
1445        );
1446    }
1447    #[test]
1448    fn test_simplify() {
1449        let s = |a, b| {
1450            assert_eq!(
1451                simplify(parse_location(a).unwrap()).unwrap().to_gb_format(),
1452                b
1453            );
1454        };
1455        s(b"1..5", "1..5");
1456        s(b"join(1..2,2..5)", "join(1..2,2..5)");
1457        s(b"join(1..2,3..5)", "1..5");
1458        s(b"join(join(1..2,3..4),5)", "1..5");
1459        s(
1460            b"join(complement(1..2),complement(4..5))",
1461            "complement(join(4..5,1..2))",
1462        );
1463    }
1464}