Struct IcebergReaderConfig

Source

pub struct IcebergReaderConfig {Show 27 fields
    pub catalog_type: Option<IcebergCatalogType>,
    pub datetime: Option<String>,
    pub glue_access_key_id: Option<String>,
    pub glue_endpoint: Option<String>,
    pub glue_id: Option<String>,
    pub glue_profile_name: Option<String>,
    pub glue_region: Option<String>,
    pub glue_secret_access_key: Option<String>,
    pub glue_session_token: Option<String>,
    pub glue_warehouse: Option<String>,
    pub metadata_location: Option<String>,
    pub mode: IcebergIngestMode,
    pub rest_audience: Option<String>,
    pub rest_credential: Option<String>,
    pub rest_headers: Option<IcebergReaderConfigRestHeaders>,
    pub rest_oauth2_server_uri: Option<String>,
    pub rest_prefix: Option<String>,
    pub rest_resource: Option<String>,
    pub rest_scope: Option<String>,
    pub rest_token: Option<String>,
    pub rest_uri: Option<String>,
    pub rest_warehouse: Option<String>,
    pub snapshot_filter: Option<String>,
    pub snapshot_id: Option<i64>,
    pub table_name: Option<String>,
    pub timestamp_column: Option<String>,
    pub extra: HashMap<String, String>,
}

Expand description

Iceberg input connector configuration.

JSON schema

{
 "description": "Iceberg input connector configuration.",
 "allOf": [
   {
     "$ref": "#/components/schemas/GlueCatalogConfig"
   },
   {
     "$ref": "#/components/schemas/RestCatalogConfig"
   },
   {
     "type": "object",
     "required": [
       "mode"
     ],
     "properties": {
       "catalog_type": {
         "allOf": [
           {
             "$ref": "#/components/schemas/IcebergCatalogType"
           }
         ]
       },
       "datetime": {
         "description": "Optional timestamp for the snapshot in the ISO-8601/RFC-3339 format, e.g.,\n\"2024-12-09T16:09:53+00:00\".\n\nWhen this option is set, the connector finds and opens the snapshot of the table as of the\nspecified point in time (based on the server time recorded in the transaction\nlog, not the event time encoded in the data).  In `snapshot` and `snapshot_and_follow`\nmodes, it retrieves this snapshot.  In `follow` and `snapshot_and_follow` modes, it\nfollows transaction log records **after** this snapshot.\n\nNote: at most one of `snapshot_id` and `datetime` options can be specified.\nWhen neither of the two options is specified, the latest committed version of the table\nis used.",
         "type": [
           "string",
           "null"
         ]
       },
       "metadata_location": {
         "description": "Location of the table metadata JSON file.\n\nThis propery is used to access an Iceberg table without a catalog. It is mutually\nexclusive with the `catalog_type` property.",
         "type": [
           "string",
           "null"
         ]
       },
       "mode": {
         "$ref": "#/components/schemas/IcebergIngestMode"
       },
       "snapshot_filter": {
         "description": "Optional row filter.\n\nThis option is only valid when `mode` is set to `snapshot` or `snapshot_and_follow`.\n\nWhen specified, only rows that satisfy the filter condition are included in the\nsnapshot.  The condition must be a valid SQL Boolean expression that can be used in\nthe `where` clause of the `select * from snapshot where ...` query.\n\nThis option can be used to specify the range of event times to include in the snapshot,\ne.g.: `ts BETWEEN '2005-01-01 00:00:00' AND '2010-12-31 23:59:59'`.",
         "type": [
           "string",
           "null"
         ]
       },
       "snapshot_id": {
         "description": "Optional snapshot id.\n\nWhen this option is set, the connector finds the specified snapshot of the table.\nIn `snapshot` and `snapshot_and_follow` modes, it loads this snapshot.\nIn `follow` and `snapshot_and_follow` modes, it follows table updates\n**after** this snapshot.\n\nNote: at most one of `snapshot_id` and `datetime` options can be specified.\nWhen neither of the two options is specified, the latest committed version of the table\nis used.",
         "type": [
           "integer",
           "null"
         ],
         "format": "int64"
       },
       "table_name": {
         "description": "Specifies the Iceberg table name in the \"namespace.table\" format.\n\nThis option is applicable when an Iceberg catalog is configured using the `catalog_type` property.",
         "type": [
           "string",
           "null"
         ]
       },
       "timestamp_column": {
         "description": "Table column that serves as an event timestamp.\n\nWhen this option is specified, and `mode` is one of `snapshot` or `snapshot_and_follow`,\ntable rows are ingested in the timestamp order, respecting the\n[`LATENESS`](https://docs.feldera.com/sql/streaming#lateness-expressions)\nproperty of the column: each ingested row has a timestamp no more than `LATENESS`\ntime units earlier than the most recent timestamp of any previously ingested row.\nThe ingestion is performed by partitioning the table into timestamp ranges of width\n`LATENESS`. Each range is processed sequentially, in increasing timestamp order.\n\n# Example\n\nConsider a table with timestamp column of type `TIMESTAMP` and lateness attribute\n`INTERVAL 1 DAY`. Assuming that the oldest timestamp in the table is\n`2024-01-01T00:00:00``, the connector will fetch all records with timestamps\nfrom `2024-01-01`, then all records for `2024-01-02`, `2024-01-03`, etc., until all records\nin the table have been ingested.\n\n# Requirements\n\n* The timestamp column must be of a supported type: integer, `DATE`, or `TIMESTAMP`.\n* The timestamp column must be declared with non-zero `LATENESS`.\n* For efficient ingest, the table must be optimized for timestamp-based\nqueries using partitioning, Z-ordering, or liquid clustering.",
         "type": [
           "string",
           "null"
         ]
       }
     },
     "additionalProperties": {
       "description": "Storage options for configuring backend object store.\n\nSee the [list of available options in PyIceberg documentation](https://py.iceberg.apache.org/configuration/#fileio).",
       "type": "string"
     }
   }
 ]
}

Fields§

§catalog_type: Option<IcebergCatalogType>§datetime: Option<String>

Optional timestamp for the snapshot in the ISO-8601/RFC-3339 format, e.g., “2024-12-09T16:09:53+00:00”.

When this option is set, the connector finds and opens the snapshot of the table as of the specified point in time (based on the server time recorded in the transaction log, not the event time encoded in the data). In snapshot and snapshot_and_follow modes, it retrieves this snapshot. In follow and snapshot_and_follow modes, it follows transaction log records after this snapshot.

Note: at most one of snapshot_id and datetime options can be specified. When neither of the two options is specified, the latest committed version of the table is used.

§glue_access_key_id: Option<String>§glue_endpoint: Option<String>§glue_id: Option<String>§glue_profile_name: Option<String>§glue_region: Option<String>§glue_secret_access_key: Option<String>§glue_session_token: Option<String>§glue_warehouse: Option<String>§metadata_location: Option<String>

Location of the table metadata JSON file.

This propery is used to access an Iceberg table without a catalog. It is mutually exclusive with the catalog_type property.

§mode: IcebergIngestMode§rest_audience: Option<String>§rest_credential: Option<String>§rest_headers: Option<IcebergReaderConfigRestHeaders>§rest_oauth2_server_uri: Option<String>§rest_prefix: Option<String>§rest_resource: Option<String>§rest_scope: Option<String>§rest_token: Option<String>§rest_uri: Option<String>§rest_warehouse: Option<String>§snapshot_filter: Option<String>

Optional row filter.

This option is only valid when mode is set to snapshot or snapshot_and_follow.

When specified, only rows that satisfy the filter condition are included in the snapshot. The condition must be a valid SQL Boolean expression that can be used in the where clause of the select * from snapshot where ... query.

This option can be used to specify the range of event times to include in the snapshot, e.g.: ts BETWEEN '2005-01-01 00:00:00' AND '2010-12-31 23:59:59'.

§snapshot_id: Option<i64>

Optional snapshot id.

When this option is set, the connector finds the specified snapshot of the table. In snapshot and snapshot_and_follow modes, it loads this snapshot. In follow and snapshot_and_follow modes, it follows table updates after this snapshot.

Note: at most one of snapshot_id and datetime options can be specified. When neither of the two options is specified, the latest committed version of the table is used.

§table_name: Option<String>

Specifies the Iceberg table name in the “namespace.table” format.

This option is applicable when an Iceberg catalog is configured using the catalog_type property.

§timestamp_column: Option<String>

Table column that serves as an event timestamp.

When this option is specified, and mode is one of snapshot or snapshot_and_follow, table rows are ingested in the timestamp order, respecting the LATENESS property of the column: each ingested row has a timestamp no more than LATENESS time units earlier than the most recent timestamp of any previously ingested row. The ingestion is performed by partitioning the table into timestamp ranges of width LATENESS. Each range is processed sequentially, in increasing timestamp order.

§Example

Consider a table with timestamp column of type TIMESTAMP and lateness attribute INTERVAL 1 DAY. Assuming that the oldest timestamp in the table is 2024-01-01T00:00:00``, the connector will fetch all records with timestamps from 2024-01-01, then all records for 2024-01-02, 2024-01-03`, etc., until all records in the table have been ingested.

§Requirements

The timestamp column must be of a supported type: integer, DATE, or TIMESTAMP.
The timestamp column must be declared with non-zero LATENESS.
For efficient ingest, the table must be optimized for timestamp-based queries using partitioning, Z-ordering, or liquid clustering.

§extra: HashMap<String, String>

Struct IcebergReaderConfigCopy item path

Fields§

§Example

§Requirements

Implementations§

impl IcebergReaderConfig

pub fn builder() -> IcebergReaderConfig

Trait Implementations§

impl Clone for IcebergReaderConfig

fn clone(&self) -> IcebergReaderConfig

fn clone_from(&mut self, source: &Self)

impl Debug for IcebergReaderConfig

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl<'de> Deserialize<'de> for IcebergReaderConfig

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where __D: Deserializer<'de>,

impl From<&IcebergReaderConfig> for IcebergReaderConfig

fn from(value: &IcebergReaderConfig) -> Self

impl From<IcebergReaderConfig> for IcebergReaderConfig

fn from(value: IcebergReaderConfig) -> Self

impl Serialize for IcebergReaderConfig

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>where __S: Serializer,

impl TryFrom<IcebergReaderConfig> for IcebergReaderConfig

type Error = ConversionError

fn try_from(value: IcebergReaderConfig) -> Result<Self, ConversionError>

Auto Trait Implementations§

impl Freeze for IcebergReaderConfig

impl RefUnwindSafe for IcebergReaderConfig

impl Send for IcebergReaderConfig

impl Sync for IcebergReaderConfig

impl Unpin for IcebergReaderConfig

impl UnwindSafe for IcebergReaderConfig

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Policy<B, E>, P: Policy<B, E>,

impl<T> Same for T

type Output = T

impl<T> Serialize for Twhere T: Serialize + ?Sized,

fn erased_serialize(&self, serializer: &mut dyn Serializer) -> Result<Ok, Error>

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

impl<T> DeserializeOwned for Twhere T: for<'de> Deserialize<'de>,

impl<T> ErasedDestructor for Twhere T: 'static,

Struct IcebergReaderConfig

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

impl<T> Serialize for T
where T: Serialize + ?Sized,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,

impl<T> ErasedDestructor for T
where T: 'static,