exon_bed/
array_builder.rs

1// Copyright 2024 WHERE TRUE Technologies.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use arrow::{
18    array::{ArrayRef, GenericStringBuilder, Int64Builder},
19    datatypes::SchemaRef,
20};
21use exon_common::ExonArrayBuilder;
22
23use super::bed_record_builder::BEDRecord;
24
25pub struct BEDArrayBuilder {
26    reference_sequence_names: GenericStringBuilder<i32>,
27    starts: Int64Builder,
28    ends: Int64Builder,
29    names: GenericStringBuilder<i32>,
30    scores: Int64Builder,
31    strands: GenericStringBuilder<i32>,
32    thick_starts: Int64Builder,
33    thick_ends: Int64Builder,
34    colors: GenericStringBuilder<i32>,
35    block_counts: Int64Builder,
36    block_sizes: GenericStringBuilder<i32>,
37    block_starts: GenericStringBuilder<i32>,
38
39    projection: Vec<usize>,
40
41    rows: usize,
42}
43
44impl BEDArrayBuilder {
45    pub fn create(schema: SchemaRef, projection: Option<Vec<usize>>) -> Self {
46        let projection = match projection {
47            Some(p) => p,
48            None => (0..schema.fields().len()).collect(),
49        };
50
51        Self {
52            reference_sequence_names: GenericStringBuilder::<i32>::new(),
53            starts: Int64Builder::new(),
54            ends: Int64Builder::new(),
55            names: GenericStringBuilder::<i32>::new(),
56            scores: Int64Builder::new(),
57            strands: GenericStringBuilder::<i32>::new(),
58            thick_starts: Int64Builder::new(),
59            thick_ends: Int64Builder::new(),
60            colors: GenericStringBuilder::<i32>::new(),
61            block_counts: Int64Builder::new(),
62            block_sizes: GenericStringBuilder::<i32>::new(),
63            block_starts: GenericStringBuilder::<i32>::new(),
64            projection,
65            rows: 0,
66        }
67    }
68
69    pub fn append(&mut self, record: BEDRecord) -> std::io::Result<()> {
70        self.rows += 1;
71
72        for col_idx in self.projection.iter() {
73            match col_idx {
74                0 => self
75                    .reference_sequence_names
76                    .append_value(record.reference_sequence_name()),
77                1 => self.starts.append_value(record.start() as i64),
78                2 => self.ends.append_value(record.end() as i64),
79                3 => self.names.append_option(record.name()),
80                4 => self.scores.append_option(record.score()),
81                5 => self.strands.append_option(record.strand()),
82                6 => self
83                    .thick_starts
84                    .append_option(record.thick_start().map(|x| x as i64)),
85                7 => self
86                    .thick_ends
87                    .append_option(record.thick_end().map(|x| x as i64)),
88                8 => self.colors.append_option(record.color()),
89                9 => self
90                    .block_counts
91                    .append_option(record.block_count().map(|x| x as i64)),
92                10 => self.block_sizes.append_option(record.block_sizes()),
93                11 => self.block_starts.append_option(record.block_starts()),
94                _ => panic!("Invalid column index"),
95            }
96        }
97
98        Ok(())
99    }
100
101    pub fn finish(&mut self) -> Vec<ArrayRef> {
102        let mut arrays: Vec<ArrayRef> = vec![];
103
104        for col_idx in self.projection.iter() {
105            match col_idx {
106                0 => arrays.push(Arc::new(self.reference_sequence_names.finish())),
107                1 => arrays.push(Arc::new(self.starts.finish())),
108                2 => arrays.push(Arc::new(self.ends.finish())),
109                3 => arrays.push(Arc::new(self.names.finish())),
110                4 => arrays.push(Arc::new(self.scores.finish())),
111                5 => arrays.push(Arc::new(self.strands.finish())),
112                6 => arrays.push(Arc::new(self.thick_starts.finish())),
113                7 => arrays.push(Arc::new(self.thick_ends.finish())),
114                8 => arrays.push(Arc::new(self.colors.finish())),
115                9 => arrays.push(Arc::new(self.block_counts.finish())),
116                10 => arrays.push(Arc::new(self.block_sizes.finish())),
117                11 => arrays.push(Arc::new(self.block_starts.finish())),
118                _ => panic!("Invalid column index"),
119            }
120        }
121
122        arrays
123    }
124}
125
126impl ExonArrayBuilder for BEDArrayBuilder {
127    fn finish(&mut self) -> Vec<ArrayRef> {
128        self.finish()
129    }
130
131    fn len(&self) -> usize {
132        self.rows
133    }
134}