pub struct IcebergReaderConfig {Show 27 fields
pub catalog_type: Option<IcebergCatalogType>,
pub datetime: Option<String>,
pub glue_access_key_id: Option<String>,
pub glue_endpoint: Option<String>,
pub glue_id: Option<String>,
pub glue_profile_name: Option<String>,
pub glue_region: Option<String>,
pub glue_secret_access_key: Option<String>,
pub glue_session_token: Option<String>,
pub glue_warehouse: Option<String>,
pub metadata_location: Option<String>,
pub mode: IcebergIngestMode,
pub rest_audience: Option<String>,
pub rest_credential: Option<String>,
pub rest_headers: Option<IcebergReaderConfigRestHeaders>,
pub rest_oauth2_server_uri: Option<String>,
pub rest_prefix: Option<String>,
pub rest_resource: Option<String>,
pub rest_scope: Option<String>,
pub rest_token: Option<String>,
pub rest_uri: Option<String>,
pub rest_warehouse: Option<String>,
pub snapshot_filter: Option<String>,
pub snapshot_id: Option<i64>,
pub table_name: Option<String>,
pub timestamp_column: Option<String>,
pub extra: HashMap<String, String>,
}
Expand description
Iceberg input connector configuration.
JSON schema
{
"description": "Iceberg input connector configuration.",
"allOf": [
{
"$ref": "#/components/schemas/GlueCatalogConfig"
},
{
"$ref": "#/components/schemas/RestCatalogConfig"
},
{
"type": "object",
"required": [
"mode"
],
"properties": {
"catalog_type": {
"allOf": [
{
"$ref": "#/components/schemas/IcebergCatalogType"
}
]
},
"datetime": {
"description": "Optional timestamp for the snapshot in the ISO-8601/RFC-3339 format, e.g.,\n\"2024-12-09T16:09:53+00:00\".\n\nWhen this option is set, the connector finds and opens the snapshot of the table as of the\nspecified point in time (based on the server time recorded in the transaction\nlog, not the event time encoded in the data). In `snapshot` and `snapshot_and_follow`\nmodes, it retrieves this snapshot. In `follow` and `snapshot_and_follow` modes, it\nfollows transaction log records **after** this snapshot.\n\nNote: at most one of `snapshot_id` and `datetime` options can be specified.\nWhen neither of the two options is specified, the latest committed version of the table\nis used.",
"type": [
"string",
"null"
]
},
"metadata_location": {
"description": "Location of the table metadata JSON file.\n\nThis propery is used to access an Iceberg table without a catalog. It is mutually\nexclusive with the `catalog_type` property.",
"type": [
"string",
"null"
]
},
"mode": {
"$ref": "#/components/schemas/IcebergIngestMode"
},
"snapshot_filter": {
"description": "Optional row filter.\n\nThis option is only valid when `mode` is set to `snapshot` or `snapshot_and_follow`.\n\nWhen specified, only rows that satisfy the filter condition are included in the\nsnapshot. The condition must be a valid SQL Boolean expression that can be used in\nthe `where` clause of the `select * from snapshot where ...` query.\n\nThis option can be used to specify the range of event times to include in the snapshot,\ne.g.: `ts BETWEEN '2005-01-01 00:00:00' AND '2010-12-31 23:59:59'`.",
"type": [
"string",
"null"
]
},
"snapshot_id": {
"description": "Optional snapshot id.\n\nWhen this option is set, the connector finds the specified snapshot of the table.\nIn `snapshot` and `snapshot_and_follow` modes, it loads this snapshot.\nIn `follow` and `snapshot_and_follow` modes, it follows table updates\n**after** this snapshot.\n\nNote: at most one of `snapshot_id` and `datetime` options can be specified.\nWhen neither of the two options is specified, the latest committed version of the table\nis used.",
"type": [
"integer",
"null"
],
"format": "int64"
},
"table_name": {
"description": "Specifies the Iceberg table name in the \"namespace.table\" format.\n\nThis option is applicable when an Iceberg catalog is configured using the `catalog_type` property.",
"type": [
"string",
"null"
]
},
"timestamp_column": {
"description": "Table column that serves as an event timestamp.\n\nWhen this option is specified, and `mode` is one of `snapshot` or `snapshot_and_follow`,\ntable rows are ingested in the timestamp order, respecting the\n[`LATENESS`](https://docs.feldera.com/sql/streaming#lateness-expressions)\nproperty of the column: each ingested row has a timestamp no more than `LATENESS`\ntime units earlier than the most recent timestamp of any previously ingested row.\nThe ingestion is performed by partitioning the table into timestamp ranges of width\n`LATENESS`. Each range is processed sequentially, in increasing timestamp order.\n\n# Example\n\nConsider a table with timestamp column of type `TIMESTAMP` and lateness attribute\n`INTERVAL 1 DAY`. Assuming that the oldest timestamp in the table is\n`2024-01-01T00:00:00``, the connector will fetch all records with timestamps\nfrom `2024-01-01`, then all records for `2024-01-02`, `2024-01-03`, etc., until all records\nin the table have been ingested.\n\n# Requirements\n\n* The timestamp column must be of a supported type: integer, `DATE`, or `TIMESTAMP`.\n* The timestamp column must be declared with non-zero `LATENESS`.\n* For efficient ingest, the table must be optimized for timestamp-based\nqueries using partitioning, Z-ordering, or liquid clustering.",
"type": [
"string",
"null"
]
}
},
"additionalProperties": {
"description": "Storage options for configuring backend object store.\n\nSee the [list of available options in PyIceberg documentation](https://py.iceberg.apache.org/configuration/#fileio).",
"type": "string"
}
}
]
}
Fields§
§catalog_type: Option<IcebergCatalogType>
§datetime: Option<String>
Optional timestamp for the snapshot in the ISO-8601/RFC-3339 format, e.g., “2024-12-09T16:09:53+00:00”.
When this option is set, the connector finds and opens the snapshot of the table as of the
specified point in time (based on the server time recorded in the transaction
log, not the event time encoded in the data). In snapshot
and snapshot_and_follow
modes, it retrieves this snapshot. In follow
and snapshot_and_follow
modes, it
follows transaction log records after this snapshot.
Note: at most one of snapshot_id
and datetime
options can be specified.
When neither of the two options is specified, the latest committed version of the table
is used.
glue_access_key_id: Option<String>
§glue_endpoint: Option<String>
§glue_id: Option<String>
§glue_profile_name: Option<String>
§glue_region: Option<String>
§glue_secret_access_key: Option<String>
§glue_session_token: Option<String>
§glue_warehouse: Option<String>
§metadata_location: Option<String>
Location of the table metadata JSON file.
This propery is used to access an Iceberg table without a catalog. It is mutually
exclusive with the catalog_type
property.
mode: IcebergIngestMode
§rest_audience: Option<String>
§rest_credential: Option<String>
§rest_headers: Option<IcebergReaderConfigRestHeaders>
§rest_oauth2_server_uri: Option<String>
§rest_prefix: Option<String>
§rest_resource: Option<String>
§rest_scope: Option<String>
§rest_token: Option<String>
§rest_uri: Option<String>
§rest_warehouse: Option<String>
§snapshot_filter: Option<String>
Optional row filter.
This option is only valid when mode
is set to snapshot
or snapshot_and_follow
.
When specified, only rows that satisfy the filter condition are included in the
snapshot. The condition must be a valid SQL Boolean expression that can be used in
the where
clause of the select * from snapshot where ...
query.
This option can be used to specify the range of event times to include in the snapshot,
e.g.: ts BETWEEN '2005-01-01 00:00:00' AND '2010-12-31 23:59:59'
.
snapshot_id: Option<i64>
Optional snapshot id.
When this option is set, the connector finds the specified snapshot of the table.
In snapshot
and snapshot_and_follow
modes, it loads this snapshot.
In follow
and snapshot_and_follow
modes, it follows table updates
after this snapshot.
Note: at most one of snapshot_id
and datetime
options can be specified.
When neither of the two options is specified, the latest committed version of the table
is used.
table_name: Option<String>
Specifies the Iceberg table name in the “namespace.table” format.
This option is applicable when an Iceberg catalog is configured using the catalog_type
property.
timestamp_column: Option<String>
Table column that serves as an event timestamp.
When this option is specified, and mode
is one of snapshot
or snapshot_and_follow
,
table rows are ingested in the timestamp order, respecting the
LATENESS
property of the column: each ingested row has a timestamp no more than LATENESS
time units earlier than the most recent timestamp of any previously ingested row.
The ingestion is performed by partitioning the table into timestamp ranges of width
LATENESS
. Each range is processed sequentially, in increasing timestamp order.
§Example
Consider a table with timestamp column of type TIMESTAMP
and lateness attribute
INTERVAL 1 DAY
. Assuming that the oldest timestamp in the table is
2024-01-01T00:00:00``, the connector will fetch all records with timestamps from
2024-01-01, then all records for
2024-01-02,
2024-01-03`, etc., until all records
in the table have been ingested.
§Requirements
- The timestamp column must be of a supported type: integer,
DATE
, orTIMESTAMP
. - The timestamp column must be declared with non-zero
LATENESS
. - For efficient ingest, the table must be optimized for timestamp-based queries using partitioning, Z-ordering, or liquid clustering.
extra: HashMap<String, String>
Implementations§
Source§impl IcebergReaderConfig
impl IcebergReaderConfig
pub fn builder() -> IcebergReaderConfig
Trait Implementations§
Source§impl Clone for IcebergReaderConfig
impl Clone for IcebergReaderConfig
Source§fn clone(&self) -> IcebergReaderConfig
fn clone(&self) -> IcebergReaderConfig
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source
. Read more