Skip to main content

lance_graph_catalog/
catalog_provider.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Catalog provider trait and data types for external catalog integration.
5//!
6//! Inspired by Presto's `ConnectorMetadata` SPI, this module defines the
7//! abstract interface for browsing external catalogs (Unity Catalog, Hive
8//! Metastore, AWS Glue, etc.).
9
10use std::collections::HashMap;
11
12use arrow_schema::SchemaRef;
13use async_trait::async_trait;
14
15/// Metadata about a catalog (top-level namespace).
16#[derive(Debug, Clone)]
17pub struct CatalogInfo {
18    pub name: String,
19    pub comment: Option<String>,
20    pub properties: HashMap<String, String>,
21    pub created_at: Option<i64>,
22    pub updated_at: Option<i64>,
23}
24
25/// Metadata about a schema (second-level namespace within a catalog).
26#[derive(Debug, Clone)]
27pub struct SchemaInfo {
28    pub name: String,
29    pub catalog_name: String,
30    pub comment: Option<String>,
31    pub properties: HashMap<String, String>,
32    pub created_at: Option<i64>,
33    pub updated_at: Option<i64>,
34}
35
36/// Metadata about a column in a table.
37#[derive(Debug, Clone)]
38pub struct ColumnInfo {
39    pub name: String,
40    /// Human-readable type string (e.g., "INT", "VARCHAR(255)").
41    pub type_text: String,
42    /// Canonical type name from the catalog (e.g., "INT", "STRING").
43    pub type_name: String,
44    /// Column position (0-based).
45    pub position: i32,
46    pub nullable: bool,
47    pub comment: Option<String>,
48}
49
50/// Data format of the underlying storage.
51#[derive(Debug, Clone, PartialEq, Eq)]
52pub enum DataSourceFormat {
53    Delta,
54    Parquet,
55    Csv,
56    Json,
57    Avro,
58    Orc,
59    Text,
60    Other(String),
61}
62
63/// Type of table (managed vs external).
64#[derive(Debug, Clone, PartialEq, Eq)]
65pub enum TableType {
66    Managed,
67    External,
68}
69
70/// Full table metadata including columns and storage information.
71#[derive(Debug, Clone)]
72pub struct TableInfo {
73    pub name: String,
74    pub catalog_name: String,
75    pub schema_name: String,
76    pub table_type: TableType,
77    pub data_source_format: DataSourceFormat,
78    pub columns: Vec<ColumnInfo>,
79    pub storage_location: Option<String>,
80    pub comment: Option<String>,
81    pub properties: HashMap<String, String>,
82    pub created_at: Option<i64>,
83    pub updated_at: Option<i64>,
84}
85
86/// Errors that can occur during catalog operations.
87#[derive(Debug)]
88pub enum CatalogError {
89    /// Network or HTTP error.
90    ConnectionError(String),
91    /// Resource not found (catalog, schema, or table).
92    NotFound(String),
93    /// Authentication or authorization failure.
94    AuthError(String),
95    /// Invalid or unparsable response from the catalog server.
96    InvalidResponse(String),
97    /// Failed to map a catalog type to an Arrow type.
98    TypeMappingError(String),
99    /// Other errors.
100    Other(String),
101}
102
103impl std::fmt::Display for CatalogError {
104    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
105        match self {
106            Self::ConnectionError(msg) => write!(f, "Catalog connection error: {}", msg),
107            Self::NotFound(msg) => write!(f, "Not found: {}", msg),
108            Self::AuthError(msg) => write!(f, "Auth error: {}", msg),
109            Self::InvalidResponse(msg) => write!(f, "Invalid response: {}", msg),
110            Self::TypeMappingError(msg) => write!(f, "Type mapping error: {}", msg),
111            Self::Other(msg) => write!(f, "Catalog error: {}", msg),
112        }
113    }
114}
115
116impl std::error::Error for CatalogError {}
117
118pub type CatalogResult<T> = std::result::Result<T, CatalogError>;
119
120/// Abstract trait for browsing an external catalog.
121///
122/// Analogous to Presto's `ConnectorMetadata`. Implementations provide access
123/// to catalog metadata (catalogs, schemas, tables, columns) without being
124/// coupled to any specific data format or storage backend.
125///
126/// # Extensibility
127///
128/// Implement this trait to add support for new catalog backends:
129/// - Unity Catalog (provided)
130/// - Hive Metastore (future)
131/// - AWS Glue (future)
132/// - Iceberg REST Catalog (future)
133#[async_trait]
134pub trait CatalogProvider: Send + Sync {
135    /// Human-readable name of this catalog provider (e.g., "unity-catalog").
136    fn name(&self) -> &str;
137
138    /// List all catalogs available in this provider.
139    async fn list_catalogs(&self) -> CatalogResult<Vec<CatalogInfo>>;
140
141    /// Get information about a specific catalog.
142    async fn get_catalog(&self, name: &str) -> CatalogResult<CatalogInfo>;
143
144    /// List all schemas within a catalog.
145    async fn list_schemas(&self, catalog_name: &str) -> CatalogResult<Vec<SchemaInfo>>;
146
147    /// Get information about a specific schema.
148    async fn get_schema(&self, catalog_name: &str, schema_name: &str) -> CatalogResult<SchemaInfo>;
149
150    /// List all tables within a schema.
151    async fn list_tables(
152        &self,
153        catalog_name: &str,
154        schema_name: &str,
155    ) -> CatalogResult<Vec<TableInfo>>;
156
157    /// Get detailed information about a specific table, including columns.
158    async fn get_table(
159        &self,
160        catalog_name: &str,
161        schema_name: &str,
162        table_name: &str,
163    ) -> CatalogResult<TableInfo>;
164
165    /// Convert a table's column definitions to an Arrow schema.
166    ///
167    /// The default implementation uses the standard type mapping from
168    /// [`crate::type_mapping::columns_to_arrow_schema`].
169    fn table_to_arrow_schema(&self, table: &TableInfo) -> CatalogResult<SchemaRef> {
170        crate::type_mapping::columns_to_arrow_schema(&table.columns)
171    }
172}