Manifest

Struct Manifest 

Source
pub struct Manifest {
Show 19 fields pub schema: Schema, pub version: u64, pub branch: Option<String>, pub writer_version: Option<WriterVersion>, pub fragments: Arc<Vec<Fragment>>, pub version_aux_data: usize, pub index_section: Option<usize>, pub timestamp_nanos: u128, pub tag: Option<String>, pub reader_feature_flags: u64, pub writer_feature_flags: u64, pub max_fragment_id: Option<u32>, pub transaction_file: Option<String>, pub transaction_section: Option<usize>, pub next_row_id: u64, pub data_storage_format: DataStorageFormat, pub config: HashMap<String, String>, pub table_metadata: HashMap<String, String>, pub base_paths: HashMap<u32, BasePath>, /* private fields */
}
Expand description

Manifest of a dataset

  • Schema
  • Version
  • Fragments.
  • Indices.

Fields§

§schema: Schema

Dataset schema.

§version: u64

Dataset version

§branch: Option<String>

Branch name, None if the dataset is the main branch.

§writer_version: Option<WriterVersion>

Version of the writer library that wrote this manifest.

§fragments: Arc<Vec<Fragment>>

Fragments, the pieces to build the dataset.

This list is stored in order, sorted by fragment id. However, the fragment id sequence may have gaps.

§version_aux_data: usize

The file position of the version aux data.

§index_section: Option<usize>

The file position of the index metadata.

§timestamp_nanos: u128

The creation timestamp with nanosecond resolution as 128-bit integer

§tag: Option<String>

An optional string tag for this version

§reader_feature_flags: u64

The reader flags

§writer_feature_flags: u64

The writer flags

§max_fragment_id: Option<u32>

The max fragment id used so far None means never set, Some(0) means max ID used so far is 0

§transaction_file: Option<String>

The path to the transaction file, relative to the root of the dataset

§transaction_section: Option<usize>

The file position of the inline transaction content inside the manifest

§next_row_id: u64

The max row id used so far.

§data_storage_format: DataStorageFormat

The storage format of the data files.

§config: HashMap<String, String>

Table configuration.

§table_metadata: HashMap<String, String>

Table metadata.

This is a key-value map that can be used to store arbitrary metadata associated with the table. This is different than configuration, which is used to tell libraries how to read, write, or manage the table.

§base_paths: HashMap<u32, BasePath>

Implementations§

Source§

impl Manifest

Source

pub fn new( schema: Schema, fragments: Arc<Vec<Fragment>>, data_storage_format: DataStorageFormat, base_paths: HashMap<u32, BasePath>, ) -> Self

Source

pub fn new_from_previous( previous: &Self, schema: Schema, fragments: Arc<Vec<Fragment>>, ) -> Self

Source

pub fn shallow_clone( &self, ref_name: Option<String>, ref_path: String, ref_base_id: u32, branch_name: Option<String>, transaction_file: String, ) -> Self

Performs a shallow_clone of the manifest entirely in memory without:

  • Any persistent storage operations
  • Modifications to the original data
  • If the shallow clone is for branch, ref_name is the source branch
Source

pub fn timestamp(&self) -> DateTime<Utc>

Return the timestamp_nanos value as a Utc DateTime

Source

pub fn set_timestamp(&mut self, nanos: u128)

Set the timestamp_nanos value from a Utc DateTime

Source

pub fn config_mut(&mut self) -> &mut HashMap<String, String>

Get a mutable reference to the config

Source

pub fn table_metadata_mut(&mut self) -> &mut HashMap<String, String>

Get a mutable reference to the table metadata

Source

pub fn schema_metadata_mut(&mut self) -> &mut HashMap<String, String>

Get a mutable reference to the schema metadata

Source

pub fn field_metadata_mut( &mut self, field_id: i32, ) -> Option<&mut HashMap<String, String>>

Get a mutable reference to the field metadata for a specific field id

Returns None if the field does not exist in the schema.

Source

pub fn update_config( &mut self, upsert_values: impl IntoIterator<Item = (String, String)>, )

👎Deprecated: Use config_mut() for direct access to config HashMap

Set the config from an iterator

Source

pub fn delete_config_keys(&mut self, delete_keys: &[&str])

👎Deprecated: Use config_mut() for direct access to config HashMap

Delete config keys using a slice of keys

Source

pub fn replace_schema_metadata(&mut self, new_metadata: HashMap<String, String>)

👎Deprecated: Use schema_metadata_mut() for direct access to schema metadata HashMap

Replaces the schema metadata with the given key-value pairs.

Source

pub fn replace_field_metadata( &mut self, field_id: i32, new_metadata: HashMap<String, String>, ) -> Result<()>

👎Deprecated: Use field_metadata_mut(field_id) for direct access to field metadata HashMap

Replaces the metadata of the field with the given id with the given key-value pairs.

If the field does not exist in the schema, this is a no-op.

Source

pub fn update_max_fragment_id(&mut self)

Check the current fragment list and update the high water mark

Source

pub fn max_fragment_id(&self) -> Option<u64>

Return the max fragment id. Note this does not support recycling of fragment ids.

This will return None if there are no fragments and max_fragment_id was never set.

Source

pub fn max_field_id(&self) -> i32

Get the max used field id

This is different than Schema::max_field_id because it also considers the field ids in the data files that have been dropped from the schema.

Source

pub fn fragments_since(&self, since: &Self) -> Result<Vec<Fragment>>

Return the fragments that are newer than the given manifest. Note this does not support recycling of fragment ids.

Source

pub fn fragments_by_offset_range( &self, range: Range<usize>, ) -> Vec<(usize, &Fragment)>

Find the fragments that contain the rows, identified by the offset range.

Note that the offsets are the logical offsets of rows, not row IDs.

§Parameters

range: Range Offset range

§Returns

Vec<(usize, Fragment)> A vector of (starting_offset_of_fragment, fragment) pairs.

Source

pub fn uses_stable_row_ids(&self) -> bool

Whether the dataset uses stable row ids.

Source

pub fn serialized(&self) -> Vec<u8>

Creates a serialized copy of the manifest, suitable for IPC or temp storage and can be used to create a dataset

Source

pub fn should_use_legacy_format(&self) -> bool

Source

pub fn summary(&self) -> ManifestSummary

Get the summary information of a manifest.

This function calculates various statistics about the manifest, including:

  • total_files_size: Total size of all data files in bytes
  • total_fragments: Total number of fragments in the dataset
  • total_data_files: Total number of data files across all fragments
  • total_deletion_files: Total number of deletion files
  • total_data_file_rows: Total number of rows in data files
  • total_deletion_file_rows: Total number of deleted rows in deletion files
  • total_rows: Total number of rows in the dataset

Trait Implementations§

Source§

impl Clone for Manifest

Source§

fn clone(&self) -> Manifest

Returns a duplicate of the value. Read more
1.0.0§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for Manifest

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl DeepSizeOf for Manifest

Source§

fn deep_size_of_children(&self, context: &mut Context) -> usize

Returns an estimation of the heap-managed storage of this object. This does not include the size of the object itself. Read more
Source§

fn deep_size_of(&self) -> usize

Returns an estimation of a total size of memory owned by the object, including heap-managed storage. Read more
Source§

impl From<&Manifest> for Manifest

Source§

fn from(m: &Manifest) -> Self

Converts to this type from the input type.
Source§

impl PartialEq for Manifest

Source§

fn eq(&self, other: &Manifest) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
Source§

impl ProtoStruct for Manifest

Source§

impl TryFrom<Manifest> for Manifest

Source§

type Error = Error

The type returned in the event of a conversion error.
Source§

fn try_from(p: Manifest) -> Result<Self>

Performs the conversion.
Source§

impl StructuralPartialEq for Manifest

Auto Trait Implementations§

Blanket Implementations§

§

impl<T> Any for T
where T: 'static + ?Sized,

§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
§

impl<T> Borrow<T> for T
where T: ?Sized,

§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
§

impl<T> BorrowMut<T> for T
where T: ?Sized,

§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
§

impl<T> CloneToUninit for T
where T: Clone,

§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
§

impl<T> From<T> for T

§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
§

impl<T, U> Into<U> for T
where U: From<T>,

§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> PolicyExt for T
where T: ?Sized,

Source§

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow only if self and other return Action::Follow. Read more
Source§

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow if either self or other returns Action::Follow. Read more
§

impl<T> ToOwned for T
where T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more