use crate::{FlightData, IpcMessage, SchemaAsIpc, SchemaResult};
use bytes::Bytes;
use std::collections::HashMap;
use std::sync::Arc;
use arrow_array::{ArrayRef, RecordBatch};
use arrow_buffer::Buffer;
use arrow_ipc::convert::fb_to_schema;
use arrow_ipc::{reader, root_as_message, writer, writer::IpcWriteOptions};
use arrow_schema::{ArrowError, Schema, SchemaRef};
#[deprecated(
since = "30.0.0",
note = "Use IpcDataGenerator directly with DictionaryTracker to avoid re-sending dictionaries"
)]
pub fn flight_data_from_arrow_batch(
batch: &RecordBatch,
options: &IpcWriteOptions,
) -> (Vec<FlightData>, FlightData) {
let data_gen = writer::IpcDataGenerator::default();
let mut dictionary_tracker = writer::DictionaryTracker::new(false);
let (encoded_dictionaries, encoded_batch) = data_gen
.encoded_batch(batch, &mut dictionary_tracker, options)
.expect("DictionaryTracker configured above to not error on replacement");
let flight_dictionaries = encoded_dictionaries.into_iter().map(Into::into).collect();
let flight_batch = encoded_batch.into();
(flight_dictionaries, flight_batch)
}
pub fn flight_data_to_batches(
flight_data: &[FlightData],
) -> Result<Vec<RecordBatch>, ArrowError> {
let schema = flight_data.get(0).ok_or_else(|| {
ArrowError::CastError("Need at least one FlightData for schema".to_string())
})?;
let message = root_as_message(&schema.data_header[..])
.map_err(|_| ArrowError::CastError("Cannot get root as message".to_string()))?;
let ipc_schema: arrow_ipc::Schema = message.header_as_schema().ok_or_else(|| {
ArrowError::CastError("Cannot get header as Schema".to_string())
})?;
let schema = fb_to_schema(ipc_schema);
let schema = Arc::new(schema);
let mut batches = vec![];
let dictionaries_by_id = HashMap::new();
for datum in flight_data[1..].iter() {
let batch =
flight_data_to_arrow_batch(datum, schema.clone(), &dictionaries_by_id)?;
batches.push(batch);
}
Ok(batches)
}
pub fn flight_data_to_arrow_batch(
data: &FlightData,
schema: SchemaRef,
dictionaries_by_id: &HashMap<i64, ArrayRef>,
) -> Result<RecordBatch, ArrowError> {
let message = arrow_ipc::root_as_message(&data.data_header[..]).map_err(|err| {
ArrowError::ParseError(format!("Unable to get root as message: {err:?}"))
})?;
message
.header_as_record_batch()
.ok_or_else(|| {
ArrowError::ParseError(
"Unable to convert flight data header to a record batch".to_string(),
)
})
.map(|batch| {
reader::read_record_batch(
&Buffer::from(&data.data_body),
batch,
schema,
dictionaries_by_id,
None,
&message.version(),
)
})?
}
#[deprecated(
since = "4.4.0",
note = "Use From trait, e.g.: SchemaAsIpc::new(schema, options).try_into()"
)]
pub fn flight_schema_from_arrow_schema(
schema: &Schema,
options: &IpcWriteOptions,
) -> Result<SchemaResult, ArrowError> {
SchemaAsIpc::new(schema, options).try_into()
}
#[deprecated(
since = "4.4.0",
note = "Use From trait, e.g.: SchemaAsIpc::new(schema, options).into()"
)]
pub fn flight_data_from_arrow_schema(
schema: &Schema,
options: &IpcWriteOptions,
) -> FlightData {
SchemaAsIpc::new(schema, options).into()
}
#[deprecated(
since = "4.4.0",
note = "Use TryFrom trait, e.g.: SchemaAsIpc::new(schema, options).try_into()"
)]
pub fn ipc_message_from_arrow_schema(
schema: &Schema,
options: &IpcWriteOptions,
) -> Result<Bytes, ArrowError> {
let message = SchemaAsIpc::new(schema, options).try_into()?;
let IpcMessage(vals) = message;
Ok(vals)
}
pub fn batches_to_flight_data(
schema: Schema,
batches: Vec<RecordBatch>,
) -> Result<Vec<FlightData>, ArrowError> {
let options = IpcWriteOptions::default();
let schema_flight_data: FlightData = SchemaAsIpc::new(&schema, &options).into();
let mut dictionaries = vec![];
let mut flight_data = vec![];
let data_gen = writer::IpcDataGenerator::default();
let mut dictionary_tracker = writer::DictionaryTracker::new(false);
for batch in batches.iter() {
let (encoded_dictionaries, encoded_batch) =
data_gen.encoded_batch(batch, &mut dictionary_tracker, &options)?;
dictionaries.extend(encoded_dictionaries.into_iter().map(Into::into));
flight_data.push(encoded_batch.into());
}
let mut stream = vec![schema_flight_data];
stream.extend(dictionaries.into_iter());
stream.extend(flight_data.into_iter());
let flight_data: Vec<_> = stream.into_iter().collect();
Ok(flight_data)
}