Skip to main content

lance_graph_catalog/
table_reader.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Table reader trait for format-specific data reading.
5//!
6//! Inspired by Presto's `ConnectorPageSourceProvider`, this trait decouples
7//! data format reading from catalog metadata. Each implementation handles
8//! one or more data formats and is reusable across any [`CatalogProvider`].
9
10use std::collections::HashMap;
11
12use arrow_schema::SchemaRef;
13use async_trait::async_trait;
14use datafusion::execution::context::SessionContext;
15
16use crate::catalog_provider::{CatalogResult, DataSourceFormat, TableInfo};
17
18/// Reads table data in a specific format and registers it into a DataFusion
19/// `SessionContext`.
20///
21/// Analogous to Presto's `ConnectorPageSourceProvider` — decoupled from
22/// catalog metadata so that format readers are reusable across any catalog.
23///
24/// # Extensibility
25///
26/// Implement this trait to add support for new data formats:
27/// - Parquet (provided)
28/// - Delta Lake (provided, behind `delta` feature)
29/// - CSV (future)
30/// - Iceberg (future)
31/// - ORC (future)
32#[async_trait]
33pub trait TableReader: Send + Sync {
34    /// Human-readable name of this reader (e.g., "parquet", "delta").
35    fn name(&self) -> &str;
36
37    /// The data format(s) this reader can handle.
38    fn supported_formats(&self) -> &[DataSourceFormat];
39
40    /// Register a table into a DataFusion `SessionContext` using its storage
41    /// location.
42    ///
43    /// The reader should read (or reference) the data at `table_info.storage_location`
44    /// and register it as a DataFusion `TableProvider` so it can be queried via SQL.
45    ///
46    /// # Arguments
47    ///
48    /// * `ctx` - The DataFusion session context to register the table in.
49    /// * `table_name` - The name to register the table under (already lowercased).
50    /// * `table_info` - Full table metadata from the catalog, including `storage_location`.
51    /// * `schema` - Arrow schema derived from the table's column definitions.
52    /// * `storage_options` - Key-value pairs for cloud storage credentials
53    ///   (e.g., `azure_storage_account_name`, `aws_access_key_id`, etc.).
54    async fn register_table(
55        &self,
56        ctx: &SessionContext,
57        table_name: &str,
58        table_info: &TableInfo,
59        schema: SchemaRef,
60        storage_options: &HashMap<String, String>,
61    ) -> CatalogResult<()>;
62}