exon_bed/
array_builder.rs1use std::sync::Arc;
16
17use arrow::{
18 array::{ArrayRef, GenericStringBuilder, Int64Builder},
19 datatypes::SchemaRef,
20};
21use exon_common::ExonArrayBuilder;
22
23use super::bed_record_builder::BEDRecord;
24
25pub struct BEDArrayBuilder {
26 reference_sequence_names: GenericStringBuilder<i32>,
27 starts: Int64Builder,
28 ends: Int64Builder,
29 names: GenericStringBuilder<i32>,
30 scores: Int64Builder,
31 strands: GenericStringBuilder<i32>,
32 thick_starts: Int64Builder,
33 thick_ends: Int64Builder,
34 colors: GenericStringBuilder<i32>,
35 block_counts: Int64Builder,
36 block_sizes: GenericStringBuilder<i32>,
37 block_starts: GenericStringBuilder<i32>,
38
39 projection: Vec<usize>,
40
41 rows: usize,
42}
43
44impl BEDArrayBuilder {
45 pub fn create(schema: SchemaRef, projection: Option<Vec<usize>>) -> Self {
46 let projection = match projection {
47 Some(p) => p,
48 None => (0..schema.fields().len()).collect(),
49 };
50
51 Self {
52 reference_sequence_names: GenericStringBuilder::<i32>::new(),
53 starts: Int64Builder::new(),
54 ends: Int64Builder::new(),
55 names: GenericStringBuilder::<i32>::new(),
56 scores: Int64Builder::new(),
57 strands: GenericStringBuilder::<i32>::new(),
58 thick_starts: Int64Builder::new(),
59 thick_ends: Int64Builder::new(),
60 colors: GenericStringBuilder::<i32>::new(),
61 block_counts: Int64Builder::new(),
62 block_sizes: GenericStringBuilder::<i32>::new(),
63 block_starts: GenericStringBuilder::<i32>::new(),
64 projection,
65 rows: 0,
66 }
67 }
68
69 pub fn append(&mut self, record: BEDRecord) -> std::io::Result<()> {
70 self.rows += 1;
71
72 for col_idx in self.projection.iter() {
73 match col_idx {
74 0 => self
75 .reference_sequence_names
76 .append_value(record.reference_sequence_name()),
77 1 => self.starts.append_value(record.start() as i64),
78 2 => self.ends.append_value(record.end() as i64),
79 3 => self.names.append_option(record.name()),
80 4 => self.scores.append_option(record.score()),
81 5 => self.strands.append_option(record.strand()),
82 6 => self
83 .thick_starts
84 .append_option(record.thick_start().map(|x| x as i64)),
85 7 => self
86 .thick_ends
87 .append_option(record.thick_end().map(|x| x as i64)),
88 8 => self.colors.append_option(record.color()),
89 9 => self
90 .block_counts
91 .append_option(record.block_count().map(|x| x as i64)),
92 10 => self.block_sizes.append_option(record.block_sizes()),
93 11 => self.block_starts.append_option(record.block_starts()),
94 _ => panic!("Invalid column index"),
95 }
96 }
97
98 Ok(())
99 }
100
101 pub fn finish(&mut self) -> Vec<ArrayRef> {
102 let mut arrays: Vec<ArrayRef> = vec![];
103
104 for col_idx in self.projection.iter() {
105 match col_idx {
106 0 => arrays.push(Arc::new(self.reference_sequence_names.finish())),
107 1 => arrays.push(Arc::new(self.starts.finish())),
108 2 => arrays.push(Arc::new(self.ends.finish())),
109 3 => arrays.push(Arc::new(self.names.finish())),
110 4 => arrays.push(Arc::new(self.scores.finish())),
111 5 => arrays.push(Arc::new(self.strands.finish())),
112 6 => arrays.push(Arc::new(self.thick_starts.finish())),
113 7 => arrays.push(Arc::new(self.thick_ends.finish())),
114 8 => arrays.push(Arc::new(self.colors.finish())),
115 9 => arrays.push(Arc::new(self.block_counts.finish())),
116 10 => arrays.push(Arc::new(self.block_sizes.finish())),
117 11 => arrays.push(Arc::new(self.block_starts.finish())),
118 _ => panic!("Invalid column index"),
119 }
120 }
121
122 arrays
123 }
124}
125
126impl ExonArrayBuilder for BEDArrayBuilder {
127 fn finish(&mut self) -> Vec<ArrayRef> {
128 self.finish()
129 }
130
131 fn len(&self) -> usize {
132 self.rows
133 }
134}