1use std::sync::Arc;
16
17use arrow::datatypes::{DataType, Field, Schema};
18use exon_common::TableSchema;
19
20use crate::{ExonBEDError, ExonBEDResult};
21
22pub struct BEDSchemaBuilder {
23 file_fields: Vec<Field>,
24 partition_fields: Vec<Field>,
25}
26
27fn file_fields(n_fields: usize) -> ExonBEDResult<Vec<Field>> {
28 if !(3..=12).contains(&n_fields) {
29 return Err(ExonBEDError::InvalidNumberOfFields(n_fields));
30 }
31
32 let field_fields = vec![
33 Field::new("reference_sequence_name", DataType::Utf8, false),
34 Field::new("start", DataType::Int64, false),
35 Field::new("end", DataType::Int64, false),
36 Field::new("name", DataType::Utf8, true),
37 Field::new("score", DataType::Int64, true),
38 Field::new("strand", DataType::Utf8, true),
39 Field::new("thick_start", DataType::Int64, true),
40 Field::new("thick_end", DataType::Int64, true),
41 Field::new("color", DataType::Utf8, true),
42 Field::new("block_count", DataType::Int64, true),
43 Field::new("block_sizes", DataType::Utf8, true),
44 Field::new("block_starts", DataType::Utf8, true),
45 ];
46
47 Ok(field_fields[0..n_fields].to_vec())
48}
49
50impl BEDSchemaBuilder {
51 pub fn new(file_fields: Vec<Field>, partition_fields: Vec<Field>) -> Self {
52 Self {
53 file_fields,
54 partition_fields,
55 }
56 }
57
58 pub fn add_partition_fields(&mut self, fields: Vec<Field>) {
59 self.partition_fields.extend(fields);
60 }
61
62 pub fn build(self) -> TableSchema {
64 let mut fields = self.file_fields.clone();
65 fields.extend_from_slice(&self.partition_fields);
66
67 let schema = Schema::new(fields);
68
69 let projection = (0..self.file_fields.len()).collect::<Vec<_>>();
70
71 TableSchema::new(Arc::new(schema), projection)
72 }
73
74 pub fn with_n_fields(n_fields: usize) -> ExonBEDResult<Self> {
76 let field_fields = file_fields(n_fields)?;
77
78 Ok(Self::new(field_fields, vec![]))
79 }
80}
81
82impl Default for BEDSchemaBuilder {
83 fn default() -> Self {
84 let field_fields = file_fields(12).unwrap();
85 Self::new(field_fields, vec![])
86 }
87}