1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
//! Column descriptors: extracting single, named columns from a record batch.
use std::marker::PhantomData;
use arrow::array::ArrayRef;
use crate::{Column, DynColumn, Error, ErrorKind, LogicalType};
// Column descriptors
/// Identifies a strongly-typed column by name.
///
/// Generated by `#[derive(Quiver)]` as `COLUMN_*` constants, e.g.
/// `Measurements::COLUMN_TEMPERATURE`, for extracting a single column
/// from a record batch without hard-coding its name.
pub struct ColumnDesc<C> {
/// The name of the `#[derive(Quiver)]` struct, for error messages.
pub record_type: &'static str,
/// The name of the column in the record batch.
pub name: &'static str,
/// The metadata declared with `#[quiver(metadata("key" = "value", …))]`.
pub metadata: &'static [(&'static str, &'static str)],
_marker: PhantomData<fn() -> C>,
}
impl<L: LogicalType> ColumnDesc<Column<L>> {
/// Describes the column `name` of the `#[derive(Quiver)]` struct `record_type`.
///
/// Usually not called directly: the derive generates these as `COLUMN_*` constants.
pub const fn new(
record_type: &'static str,
name: &'static str,
metadata: &'static [(&'static str, &'static str)],
) -> Self {
Self {
record_type,
name,
metadata,
_marker: PhantomData,
}
}
/// Extracts and validates this single column of a record batch.
///
/// # Errors
/// Errors if the column is missing, has the wrong datatype, or unexpected nulls.
pub fn extract(&self, batch: &arrow::record_batch::RecordBatch) -> Result<Column<L>, Error> {
let Self {
record_type, name, ..
} = *self;
Column::extract_named(batch, name, record_type)
}
}
impl<L: crate::ConcreteType> ColumnDesc<Column<L>> {
/// The arrow field of this column, including the declared metadata.
#[must_use]
pub fn arrow_field(&self) -> arrow::datatypes::Field {
arrow::datatypes::Field::new(self.name, L::datatype(), L::NULLABLE).with_metadata(
self.metadata
.iter()
.map(|(key, value)| ((*key).to_owned(), (*value).to_owned()))
.collect(),
)
}
}
/// Identifies a dynamically-typed column by name.
///
/// Generated by `#[derive(Quiver)]` as `COLUMN_*` constants
/// for raw arrow array fields.
pub struct DynColumnDesc {
/// The name of the `#[derive(Quiver)]` struct, for error messages.
pub record_type: &'static str,
/// The name of the column in the record batch.
pub name: &'static str,
}
impl DynColumnDesc {
/// Describes the column `name` of the `#[derive(Quiver)]` struct `record_type`.
///
/// Usually not called directly: the derive generates these as `COLUMN_*` constants.
pub const fn new(record_type: &'static str, name: &'static str) -> Self {
Self { record_type, name }
}
/// Extracts this single column of a record batch.
///
/// # Errors
/// Errors if the column is missing.
pub fn extract(&self, batch: &arrow::record_batch::RecordBatch) -> Result<DynColumn, Error> {
let Self { record_type, name } = *self;
let index = batch
.schema_ref()
.index_of(name)
.map_err(|_not_found| Error {
record_type,
kind: ErrorKind::MissingColumn {
column: name.to_owned(),
},
})?;
Ok(DynColumn {
field: std::sync::Arc::clone(&batch.schema_ref().fields()[index]),
array: ArrayRef::clone(batch.column(index)),
})
}
}