exon_sdf/
schema_builder.rs1use std::sync::Arc;
16
17use arrow::datatypes::{Field, Schema};
18use exon_common::TableSchema;
19
20use crate::record::Data;
21
22pub struct SDFSchemaBuilder {
24 file_fields: Vec<Field>,
25 partition_fields: Vec<Field>,
26}
27
28impl Default for SDFSchemaBuilder {
29 fn default() -> Self {
30 let data_fields = vec![Field::new(
32 "canonical_smiles",
33 arrow::datatypes::DataType::Utf8,
34 false,
35 )];
36 let struct_type = arrow::datatypes::DataType::Struct(data_fields.into());
37
38 let file_fields = vec![
39 Field::new("header", arrow::datatypes::DataType::Utf8, false),
41 Field::new("atom_count", arrow::datatypes::DataType::UInt32, false),
43 Field::new("bond_count", arrow::datatypes::DataType::UInt32, false),
45 Field::new("data", struct_type, false),
47 ];
48
49 Self {
50 file_fields,
51 partition_fields: Vec::new(),
52 }
53 }
54}
55
56impl SDFSchemaBuilder {
57 pub fn new() -> Self {
59 SDFSchemaBuilder {
60 file_fields: Vec::new(),
61 partition_fields: Vec::new(),
62 }
63 }
64
65 pub fn add_field(&mut self, field: Field) {
67 self.file_fields.push(field);
68 }
69
70 pub fn add_partition_field(&mut self, field: Field) {
72 self.partition_fields.push(field);
73 }
74
75 pub fn update_data_field(&mut self, data: &Data) {
77 let new_fields = data
78 .into_iter()
79 .map(|d| Field::new(d.header(), arrow::datatypes::DataType::Utf8, true))
80 .collect::<Vec<_>>();
81
82 let struct_type = arrow::datatypes::DataType::Struct(new_fields.into());
83 self.file_fields[3] = Field::new("data", struct_type, false);
84 }
85
86 pub fn build(self) -> TableSchema {
88 let mut fields = self.file_fields.clone();
89 fields.extend_from_slice(&self.partition_fields);
90
91 let schema = Schema::new(fields);
92
93 let projection = (0..self.file_fields.len()).collect::<Vec<_>>();
94
95 TableSchema::new(Arc::new(schema), projection)
96 }
97}