exon_gtf/
array_builder.rs

1// Copyright 2023 WHERE TRUE Technologies.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use arrow::{
18    array::{ArrayRef, Float32Builder, GenericStringBuilder, Int64Builder, MapBuilder},
19    error::ArrowError,
20};
21use noodles::gtf::Record;
22
23pub struct GTFArrayBuilder {
24    seqnames: GenericStringBuilder<i32>,
25    sources: GenericStringBuilder<i32>,
26    feature_types: GenericStringBuilder<i32>,
27    starts: Int64Builder,
28    ends: Int64Builder,
29    scores: Float32Builder,
30    strands: GenericStringBuilder<i32>,
31    frame: GenericStringBuilder<i32>,
32    attributes: MapBuilder<GenericStringBuilder<i32>, GenericStringBuilder<i32>>,
33
34    rows: usize,
35}
36
37impl Default for GTFArrayBuilder {
38    fn default() -> Self {
39        Self::new()
40    }
41}
42
43impl GTFArrayBuilder {
44    pub fn new() -> Self {
45        Self {
46            seqnames: GenericStringBuilder::<i32>::new(),
47            sources: GenericStringBuilder::<i32>::new(),
48            feature_types: GenericStringBuilder::<i32>::new(),
49            starts: Int64Builder::new(),
50            ends: Int64Builder::new(),
51            scores: Float32Builder::new(),
52            strands: GenericStringBuilder::<i32>::new(),
53            frame: GenericStringBuilder::<i32>::new(),
54            attributes: MapBuilder::new(
55                None,
56                GenericStringBuilder::<i32>::new(),
57                GenericStringBuilder::<i32>::new(),
58            ),
59            rows: 0,
60        }
61    }
62
63    pub fn len(&self) -> usize {
64        self.rows
65    }
66
67    pub fn is_empty(&self) -> bool {
68        self.rows == 0
69    }
70
71    pub fn append(&mut self, record: &Record) -> Result<(), ArrowError> {
72        self.seqnames.append_value(record.reference_sequence_name());
73        self.sources.append_value(record.source());
74        self.feature_types.append_value(record.ty());
75        self.starts.append_value(record.start().get() as i64);
76        self.ends.append_value(record.end().get() as i64);
77        self.scores.append_option(record.score());
78        self.strands.append_option(record.strand());
79        self.frame
80            .append_option(record.frame().map(|frame| frame.to_string()));
81
82        for entry in record.attributes().iter() {
83            self.attributes.keys().append_value(entry.key());
84            self.attributes.values().append_value(entry.value());
85        }
86
87        self.attributes.append(true)?;
88
89        self.rows += 1;
90
91        Ok(())
92    }
93
94    pub fn finish(&mut self) -> Vec<ArrayRef> {
95        let seqnames = self.seqnames.finish();
96        let sources = self.sources.finish();
97        let feature_types = self.feature_types.finish();
98        let starts = self.starts.finish();
99        let ends = self.ends.finish();
100        let scores = self.scores.finish();
101        let strands = self.strands.finish();
102        let frames = self.frame.finish();
103        let attributes = self.attributes.finish();
104
105        vec![
106            Arc::new(seqnames),
107            Arc::new(sources),
108            Arc::new(feature_types),
109            Arc::new(starts),
110            Arc::new(ends),
111            Arc::new(scores),
112            Arc::new(strands),
113            Arc::new(frames),
114            Arc::new(attributes),
115        ]
116    }
117}