grass_runtime/algorithm/
groupby.rs

1#![allow(unused)]
2
3use std::{
4    borrow::Borrow,
5    cell::{Cell, RefCell},
6    rc::Rc,
7};
8
9use crate::{
10    property::{Region, RegionCore},
11    record::ToSelfContained,
12    ChrRef,
13};
14
15pub struct GroupBuffer<K: ToOwned, T: 'static> {
16    key: <K as ToOwned>::Owned,
17    buffer: Vec<T>,
18    overlap: Cell<Option<Option<(ChrRef<'static>, u32, u32)>>>,
19    outline: Cell<Option<Option<(ChrRef<'static>, u32, u32)>>>,
20}
21
22impl<K: ToOwned, T: 'static + Region> GroupBuffer<K, T> {
23    pub fn overlap(self) -> GroupOverlap<K, T>
24    where
25        T: Region,
26    {
27        GroupOverlap(self)
28    }
29    fn compute_overlap(&self) {
30        let mut ret: Option<(_, u32, u32)> = None;
31        for region in self.buffer.iter() {
32            if let Some(cur) = ret {
33                if region.overlaps(&cur) {
34                    ret = None;
35                    break;
36                } else {
37                    ret = Some((
38                        region.chrom(),
39                        region.start().max(cur.1),
40                        region.end().min(cur.2),
41                    ));
42                }
43            } else {
44                ret = Some((region.chrom(), region.start(), region.end()))
45            }
46        }
47        self.overlap.set(Some(ret));
48    }
49    fn compute_outline(&self) {
50        let mut ret: Option<(_, u32, u32)> = None;
51        for region in self.buffer.iter() {
52            if let Some(cur) = ret {
53                if region.chrom() != cur.0 {
54                    ret = None;
55                    break;
56                } else {
57                    ret = Some((
58                        region.chrom(),
59                        region.start().min(cur.1),
60                        region.end().max(cur.2),
61                    ));
62                }
63            } else {
64                ret = Some((region.chrom(), region.start(), region.end()))
65            }
66        }
67        self.outline.set(Some(ret));
68    }
69    fn get_outline(&self) -> Option<(ChrRef<'static>, u32, u32)> {
70        if let Some(ret) = self.outline.clone().take() {
71            ret
72        } else {
73            self.compute_outline();
74            self.outline.clone().take().unwrap()
75        }
76    }
77    fn get_overlap(&self) -> Option<(ChrRef<'static>, u32, u32)> {
78        if let Some(ret) = self.overlap.clone().take() {
79            ret
80        } else {
81            self.compute_overlap();
82            self.overlap.clone().take().unwrap()
83        }
84    }
85}
86
87impl<K: ToOwned, T: 'static + Region> RegionCore for GroupBuffer<K, T> {
88    fn start(&self) -> u32 {
89        self.get_outline().map_or(0, |(_, s, _)| s)
90    }
91
92    fn end(&self) -> u32 {
93        self.get_outline().map_or(0, |(_, _, e)| e)
94    }
95
96    fn chrom(&self) -> crate::ChrRef<'static> {
97        self.get_outline().map_or(ChrRef::Dummy, |(c, _, _)| c)
98    }
99}
100pub struct GroupOverlap<K: ToOwned, T: 'static + Region>(GroupBuffer<K, T>);
101
102impl<K: ToOwned, T: 'static + Region> RegionCore for GroupOverlap<K, T> {
103    fn start(&self) -> u32 {
104        self.0.get_overlap().map_or(0, |(_, s, _)| s)
105    }
106
107    fn end(&self) -> u32 {
108        self.0.get_overlap().map_or(0, |(_, _, e)| e)
109    }
110
111    fn chrom(&self) -> crate::ChrRef<'static> {
112        self.0.get_overlap().map_or(ChrRef::Dummy, |(c, _, _)| c)
113    }
114}
115
116pub struct Groups<'a, K, I, F>
117where
118    K: ToOwned + PartialEq,
119    I: Iterator,
120    I::Item: ToSelfContained,
121{
122    inner: itertools::Groups<'a, K, I, F>,
123}
124
125impl<'a, K, I, F> Iterator for Groups<'a, K, I, F>
126where
127    K: ToOwned + PartialEq,
128    I: Iterator,
129    I::Item: ToSelfContained,
130    F: FnMut(&I::Item) -> K,
131{
132    type Item = GroupBuffer<K, <I::Item as ToSelfContained>::SelfContained>;
133    fn next(&mut self) -> Option<Self::Item> {
134        let (key, inner_group) = self.inner.next()?;
135        Some(GroupBuffer {
136            key: key.to_owned(),
137            buffer: inner_group.map(|item| item.to_self_contained()).collect(),
138            overlap: Cell::new(None),
139            outline: Cell::new(None),
140        })
141    }
142}
143
144#[cfg(test)]
145mod test {
146    use crate::{
147        algorithm::{AssumeSorted, Components},
148        record::Bed3,
149        LineRecordStreamExt,
150    };
151
152    #[test]
153    fn test_group_by() -> Result<(), Box<dyn std::error::Error>> {
154        let input = include_bytes!("../../../data/a.bed");
155        let bed3 = input.into_record_iter::<Bed3>().assume_sorted();
156        let comp_iter = bed3.components();
157        /*let mut idx = 0;
158        comp_iter.grou(move |comp| {
159            if comp.depth() == 0 {
160            idx += 1;
161            }
162            idx
163        )*/
164        Ok(())
165    }
166}