re_sorbet/
lib.rs

1//! Rerun arrow metadata and record batch definitions.
2//!
3//! Handles the structure of arrow record batches and their meta data for different use cases for Rerun.
4//!
5//! An arrow record batch that follows a specific schema is called a [`SorbetBatch`].
6//!
7//! There is also [`ChunkBatch`], which is a has even more constrained requirements.
8//! Every [`ChunkBatch`] is a [`SorbetBatch`], but the opposite does not hold.
9//!
10//! Each batch type has a matching schema type:
11//! * [`SorbetBatch`] has a [`SorbetSchema`] with [`SorbetColumnDescriptors`]
12//! * [`ChunkBatch`] has a [`ChunkSchema`] with [`ChunkColumnDescriptors`]
13
14mod chunk_batch;
15mod chunk_columns;
16mod chunk_schema;
17mod column_descriptor;
18mod column_descriptor_ref;
19mod column_kind;
20mod component_column_descriptor;
21mod error;
22mod index_column_descriptor;
23mod ipc;
24mod metadata;
25mod migrations;
26mod row_id_column_descriptor;
27mod schema_builder;
28mod selectors;
29mod sorbet_batch;
30mod sorbet_columns;
31mod sorbet_schema;
32pub mod timestamp_metadata;
33
34use arrow::array::RecordBatch;
35
36pub use self::{
37    chunk_batch::{ChunkBatch, MismatchedChunkSchemaError},
38    chunk_columns::ChunkColumnDescriptors,
39    chunk_schema::ChunkSchema,
40    column_descriptor::{ColumnDescriptor, ColumnError},
41    column_descriptor_ref::ColumnDescriptorRef,
42    column_kind::{ColumnKind, UnknownColumnKind},
43    component_column_descriptor::ComponentColumnDescriptor,
44    error::SorbetError,
45    index_column_descriptor::{IndexColumnDescriptor, UnsupportedTimeType},
46    ipc::{ipc_from_schema, migrated_schema_from_ipc, raw_schema_from_ipc},
47    metadata::{
48        ArrowBatchMetadata, ArrowFieldMetadata, MetadataExt, MissingFieldMetadata,
49        MissingMetadataKey,
50    },
51    migrations::{migrate_record_batch, migrate_schema_ref},
52    row_id_column_descriptor::{RowIdColumnDescriptor, WrongDatatypeError},
53    schema_builder::SchemaBuilder,
54    selectors::{
55        ColumnSelector, ColumnSelectorParseError, ComponentColumnSelector, TimeColumnSelector,
56    },
57    sorbet_batch::SorbetBatch,
58    sorbet_columns::{ColumnSelectorResolveError, SorbetColumnDescriptors},
59    sorbet_schema::SorbetSchema,
60    timestamp_metadata::TimestampMetadata,
61};
62
63/// The type of [`SorbetBatch`].
64#[derive(Clone, Copy, Debug, PartialEq, Eq)]
65pub enum BatchType {
66    /// Data for one entity
67    Chunk,
68
69    /// Potentially multiple entities
70    Dataframe,
71}
72
73/// Get the chunk ID from the metadata of the Arrow schema
74/// of a record batch containing a sorbet chunk.
75///
76/// Returns one of:
77/// * `Ok`
78/// * [`SorbetError::MissingChunkId`]
79/// * [`SorbetError::ChunkIdDeserializationError`]
80// TODO(#10343): remove this
81pub fn chunk_id_of_schema(
82    schema: &arrow::datatypes::Schema,
83) -> Result<re_types_core::ChunkId, SorbetError> {
84    let metadata = schema.metadata();
85    if let Some(chunk_id_str) = metadata
86        .get("rerun:id")
87        .or_else(|| metadata.get("rerun.id"))
88    {
89        chunk_id_str.parse().map_err(|err| {
90            SorbetError::ChunkIdDeserializationError(format!(
91                "Failed to deserialize chunk id {chunk_id_str:?}: {err}"
92            ))
93        })
94    } else {
95        Err(SorbetError::MissingChunkId)
96    }
97}
98
99/// If this is a [`ChunkBatch`]: does it contain static data?
100// TODO(#10343): remove this
101pub fn is_static_chunk(batch: &RecordBatch) -> Option<bool> {
102    re_tracing::profile_function!();
103    ChunkBatch::try_from(batch)
104        .ok()
105        .map(|chunk| chunk.is_static())
106}