datafusion_python/
table.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::pyarrow::ToPyArrow;
19use datafusion::datasource::{TableProvider, TableType};
20use pyo3::prelude::*;
21use std::sync::Arc;
22
23use crate::dataframe::PyDataFrame;
24use crate::dataset::Dataset;
25use crate::utils::table_provider_from_pycapsule;
26
27/// This struct is used as a common method for all TableProviders,
28/// whether they refer to an FFI provider, an internally known
29/// implementation, a dataset, or a dataframe view.
30#[pyclass(frozen, name = "RawTable", module = "datafusion.catalog", subclass)]
31#[derive(Clone)]
32pub struct PyTable {
33    pub table: Arc<dyn TableProvider>,
34}
35
36impl PyTable {
37    pub fn table(&self) -> Arc<dyn TableProvider> {
38        self.table.clone()
39    }
40}
41
42#[pymethods]
43impl PyTable {
44    /// Instantiate from any Python object that supports any of the table
45    /// types. We do not know a priori when using this method if the object
46    /// will be passed a wrapped or raw class. Here we handle all of the
47    /// following object types:
48    ///
49    /// - PyTable (essentially a clone operation), but either raw or wrapped
50    /// - DataFrame, either raw or wrapped
51    /// - FFI Table Providers via PyCapsule
52    /// - PyArrow Dataset objects
53    #[new]
54    pub fn new(obj: &Bound<'_, PyAny>) -> PyResult<Self> {
55        if let Ok(py_table) = obj.extract::<PyTable>() {
56            Ok(py_table)
57        } else if let Ok(py_table) = obj
58            .getattr("_inner")
59            .and_then(|inner| inner.extract::<PyTable>())
60        {
61            Ok(py_table)
62        } else if let Ok(py_df) = obj.extract::<PyDataFrame>() {
63            let provider = py_df.inner_df().as_ref().clone().into_view();
64            Ok(PyTable::from(provider))
65        } else if let Ok(py_df) = obj
66            .getattr("df")
67            .and_then(|inner| inner.extract::<PyDataFrame>())
68        {
69            let provider = py_df.inner_df().as_ref().clone().into_view();
70            Ok(PyTable::from(provider))
71        } else if let Some(provider) = table_provider_from_pycapsule(obj)? {
72            Ok(PyTable::from(provider))
73        } else {
74            let py = obj.py();
75            let provider = Arc::new(Dataset::new(obj, py)?) as Arc<dyn TableProvider>;
76            Ok(PyTable::from(provider))
77        }
78    }
79
80    /// Get a reference to the schema for this table
81    #[getter]
82    fn schema(&self, py: Python) -> PyResult<PyObject> {
83        self.table.schema().to_pyarrow(py)
84    }
85
86    /// Get the type of this table for metadata/catalog purposes.
87    #[getter]
88    fn kind(&self) -> &str {
89        match self.table.table_type() {
90            TableType::Base => "physical",
91            TableType::View => "view",
92            TableType::Temporary => "temporary",
93        }
94    }
95
96    fn __repr__(&self) -> PyResult<String> {
97        let kind = self.kind();
98        Ok(format!("Table(kind={kind})"))
99    }
100}
101
102impl From<Arc<dyn TableProvider>> for PyTable {
103    fn from(table: Arc<dyn TableProvider>) -> Self {
104        Self { table }
105    }
106}