1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
//! Column descriptors: extracting single, named columns from a record batch.
use std::marker::PhantomData;
use arrow::array::ArrayRef;
use crate::{Column, Datatype, DynColumn, Error, ErrorKind};
// Column descriptors
/// Identifies a strongly-typed column by name.
///
/// Generated by `#[derive(Quiver)]` as `COLUMN_*` constants, e.g.
/// `Measurements::COLUMN_TEMPERATURE`, for extracting a single column
/// from a record batch without hard-coding its name.
pub struct ColumnDesc<C> {
/// The name of the `#[derive(Quiver)]` struct, for error messages.
pub record_type: &'static str,
/// The name of the column in the record batch.
pub name: &'static str,
/// The metadata declared with `#[quiver(metadata("key" = "value", …))]`.
pub metadata: &'static [(&'static str, &'static str)],
_marker: PhantomData<fn() -> C>,
}
impl<L: Datatype> ColumnDesc<Column<L>> {
/// Describes the column `name` of the `#[derive(Quiver)]` struct `record_type`.
///
/// Usually not called directly: the derive generates these as `COLUMN_*` constants.
pub const fn new(
record_type: &'static str,
name: &'static str,
metadata: &'static [(&'static str, &'static str)],
) -> Self {
Self {
record_type,
name,
metadata,
_marker: PhantomData,
}
}
/// The arrow field of this column, including the declared metadata.
#[must_use]
pub fn arrow_field(&self) -> arrow::datatypes::Field {
arrow::datatypes::Field::new(self.name, L::datatype(), L::NULLABLE).with_metadata(
self.metadata
.iter()
.map(|(key, value)| ((*key).to_owned(), (*value).to_owned()))
.collect(),
)
}
/// Extracts and validates this single column of a record batch.
///
/// # Errors
/// Errors if the column is missing, has the wrong datatype, or unexpected nulls.
pub fn extract(&self, batch: &arrow::record_batch::RecordBatch) -> Result<Column<L>, Error> {
let Self {
record_type, name, ..
} = *self;
let (index, field) = batch
.schema_ref()
.column_with_name(name)
.ok_or_else(|| Error {
record_type,
kind: ErrorKind::MissingColumn {
column: name.to_owned(),
},
})?;
let column =
Column::try_new(ArrayRef::clone(batch.column(index))).map_err(|err| Error {
record_type,
kind: err.for_column(name.to_owned()),
})?;
Ok(column.with_metadata(
field
.metadata()
.iter()
.map(|(key, value)| (key.clone(), value.clone()))
.collect(),
))
}
}
/// Identifies a dynamically-typed column by name.
///
/// Generated by `#[derive(Quiver)]` as `COLUMN_*` constants
/// for raw arrow array fields.
pub struct DynColumnDesc {
/// The name of the `#[derive(Quiver)]` struct, for error messages.
pub record_type: &'static str,
/// The name of the column in the record batch.
pub name: &'static str,
}
impl DynColumnDesc {
/// Describes the column `name` of the `#[derive(Quiver)]` struct `record_type`.
///
/// Usually not called directly: the derive generates these as `COLUMN_*` constants.
pub const fn new(record_type: &'static str, name: &'static str) -> Self {
Self { record_type, name }
}
/// Extracts this single column of a record batch.
///
/// # Errors
/// Errors if the column is missing.
pub fn extract(&self, batch: &arrow::record_batch::RecordBatch) -> Result<DynColumn, Error> {
let Self { record_type, name } = *self;
let index = batch
.schema_ref()
.index_of(name)
.map_err(|_not_found| Error {
record_type,
kind: ErrorKind::MissingColumn {
column: name.to_owned(),
},
})?;
Ok(DynColumn {
field: std::sync::Arc::clone(&batch.schema_ref().fields()[index]),
array: ArrayRef::clone(batch.column(index)),
})
}
}