Skip to main content

adbc_core/
sync.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::collections::HashSet;
19
20use arrow_array::{RecordBatch, RecordBatchReader};
21use arrow_schema::Schema;
22
23use crate::error::Result;
24use crate::options::{self, OptionConnection, OptionDatabase, OptionStatement, OptionValue};
25use crate::PartitionedResult;
26
27/// Ability to configure an object by setting/getting options.
28pub trait Optionable {
29    type Option: AsRef<str>;
30
31    /// Set a post-init option.
32    fn set_option(&mut self, key: Self::Option, value: OptionValue) -> Result<()>;
33
34    /// Get a string option value by key.
35    fn get_option_string(&self, key: Self::Option) -> Result<String>;
36
37    /// Get a bytes option value by key.
38    fn get_option_bytes(&self, key: Self::Option) -> Result<Vec<u8>>;
39
40    /// Get an integer option value by key.
41    fn get_option_int(&self, key: Self::Option) -> Result<i64>;
42
43    /// Get a float option value by key.
44    fn get_option_double(&self, key: Self::Option) -> Result<f64>;
45}
46
47/// A handle to an ADBC driver.
48pub trait Driver {
49    type DatabaseType: Database;
50
51    /// Allocate and initialize a new database without pre-init options.
52    fn new_database(&mut self) -> Result<Self::DatabaseType>;
53
54    /// Allocate and initialize a new database with pre-init options.
55    fn new_database_with_opts(
56        &mut self,
57        opts: impl IntoIterator<Item = (OptionDatabase, OptionValue)>,
58    ) -> Result<Self::DatabaseType>;
59}
60
61/// A handle to an ADBC database.
62///
63/// Databases hold state shared by multiple connections. This typically means
64/// configuration and caches. For in-memory databases, it provides a place to
65/// hold ownership of the in-memory database.
66///
67/// Databases must be kept alive as long as any connections exist.
68pub trait Database: Optionable<Option = OptionDatabase> {
69    type ConnectionType: Connection;
70
71    /// Allocate and initialize a new connection without pre-init options.
72    fn new_connection(&self) -> Result<Self::ConnectionType>;
73
74    /// Allocate and initialize a new connection with pre-init options.
75    fn new_connection_with_opts(
76        &self,
77        opts: impl IntoIterator<Item = (options::OptionConnection, OptionValue)>,
78    ) -> Result<Self::ConnectionType>;
79}
80
81/// A handle to an ADBC connection.
82///
83/// Connections provide methods for query execution, managing prepared
84/// statements, using transactions, and so on.
85///
86/// # Autocommit
87///
88/// Connections should start in autocommit mode. They can be moved out by
89/// setting [options::OptionConnection::AutoCommit] to "false". Turning off
90/// autocommit allows customizing the isolation level.
91pub trait Connection: Optionable<Option = OptionConnection> {
92    type StatementType: Statement;
93
94    /// Allocate and initialize a new statement.
95    fn new_statement(&mut self) -> Result<Self::StatementType>;
96
97    /// Cancel the in-progress operation on a connection.
98    fn cancel(&mut self) -> Result<()>;
99
100    /// Get metadata about the database/driver.
101    ///
102    /// # Arguments
103    ///
104    /// - `codes` - Requested metadata. If `None`, retrieve all available metadata.
105    ///
106    /// # Result
107    ///
108    /// The result is an Arrow dataset with the following schema:
109    ///
110    /// Field Name                  | Field Type
111    /// ----------------------------|------------------------
112    /// info_name                   | uint32 not null
113    /// info_value                  | INFO_SCHEMA
114    ///
115    /// INFO_SCHEMA is a dense union with members:
116    ///
117    /// Field Name (Type Code)      | Field Type
118    /// ----------------------------|------------------------
119    /// string_value (0)            | utf8
120    /// bool_value (1)              | bool
121    /// int64_value (2)             | int64
122    /// int32_bitmask (3)           | int32
123    /// string_list (4)             | list\<utf8\>
124    /// int32_to_int32_list_map (5) | map\<int32, list\<int32\>\>
125    fn get_info(
126        &self,
127        codes: Option<HashSet<options::InfoCode>>,
128    ) -> Result<Box<dyn RecordBatchReader + Send + 'static>>;
129
130    /// Get a hierarchical view of all catalogs, database schemas, tables, and
131    /// columns.
132    ///
133    /// # Arguments
134    ///
135    /// - `depth` - The level of nesting to query.
136    /// - `catalog` - Only show tables in the given catalog. If `None`,
137    ///   do not filter by catalog. If an empty string, only show tables
138    ///   without a catalog.  May be a search pattern.
139    /// - `db_schema` - Only show tables in the given database schema. If
140    ///   `None`, do not filter by database schema. If an empty string, only show
141    ///   tables without a database schema. May be a search pattern.
142    /// - `table_name` - Only show tables with the given name. If `None`, do not
143    ///   filter by name. May be a search pattern.
144    /// - `table_type` - Only show tables matching one of the given table
145    ///   types. If `None`, show tables of any type. Valid table types can be fetched
146    ///   from [Connection::get_table_types].
147    /// - `column_name` - Only show columns with the given name. If
148    ///   `None`, do not filter by name.  May be a search pattern..
149    ///
150    /// # Result
151    ///
152    /// The result is an Arrow dataset with the following schema:
153    ///
154    /// | Field Name               | Field Type               |
155    /// |--------------------------|--------------------------|
156    /// | catalog_name             | utf8                     |
157    /// | catalog_db_schemas       | list\<DB_SCHEMA_SCHEMA\> |
158    ///
159    /// DB_SCHEMA_SCHEMA is a Struct with fields:
160    ///
161    /// | Field Name               | Field Type              |
162    /// |--------------------------|-------------------------|
163    /// | db_schema_name           | utf8                    |
164    /// | db_schema_tables         | list\<TABLE_SCHEMA\>    |
165    ///
166    /// TABLE_SCHEMA is a Struct with fields:
167    ///
168    /// | Field Name               | Field Type                |
169    /// |--------------------------|---------------------------|
170    /// | table_name               | utf8 not null             |
171    /// | table_type               | utf8 not null             |
172    /// | table_columns            | list\<COLUMN_SCHEMA\>     |
173    /// | table_constraints        | list\<CONSTRAINT_SCHEMA\> |
174    ///
175    /// COLUMN_SCHEMA is a Struct with fields:
176    ///
177    /// | Field Name               | Field Type              | Comments |
178    /// |--------------------------|-------------------------|----------|
179    /// | column_name              | utf8 not null           |          |
180    /// | ordinal_position         | int32                   | (1)      |
181    /// | remarks                  | utf8                    | (2)      |
182    /// | xdbc_data_type           | int16                   | (3)      |
183    /// | xdbc_type_name           | utf8                    | (3)      |
184    /// | xdbc_column_size         | int32                   | (3)      |
185    /// | xdbc_decimal_digits      | int16                   | (3)      |
186    /// | xdbc_num_prec_radix      | int16                   | (3)      |
187    /// | xdbc_nullable            | int16                   | (3)      |
188    /// | xdbc_column_def          | utf8                    | (3)      |
189    /// | xdbc_sql_data_type       | int16                   | (3)      |
190    /// | xdbc_datetime_sub        | int16                   | (3)      |
191    /// | xdbc_char_octet_length   | int32                   | (3)      |
192    /// | xdbc_is_nullable         | utf8                    | (3)      |
193    /// | xdbc_scope_catalog       | utf8                    | (3)      |
194    /// | xdbc_scope_schema        | utf8                    | (3)      |
195    /// | xdbc_scope_table         | utf8                    | (3)      |
196    /// | xdbc_is_autoincrement    | bool                    | (3)      |
197    /// | xdbc_is_generatedcolumn  | bool                    | (3)      |
198    ///
199    /// 1. The column's ordinal position in the table (starting from 1).
200    /// 2. Database-specific description of the column.
201    /// 3. Optional value.  Should be null if not supported by the driver.
202    ///    `xdbc_` values are meant to provide JDBC/ODBC-compatible metadata
203    ///    in an agnostic manner.
204    ///
205    /// CONSTRAINT_SCHEMA is a Struct with fields:
206    ///
207    /// | Field Name               | Field Type              | Comments |
208    /// |--------------------------|-------------------------|----------|
209    /// | constraint_name          | utf8                    |          |
210    /// | constraint_type          | utf8 not null           | (1)      |
211    /// | constraint_column_names  | list\<utf8\> not null     | (2)      |
212    /// | constraint_column_usage  | list\<USAGE_SCHEMA\>      | (3)      |
213    ///
214    /// 1. One of `CHECK`, `FOREIGN KEY`, `PRIMARY KEY`, or `UNIQUE`.
215    /// 2. The columns on the current table that are constrained, in
216    ///    order.
217    /// 3. For `FOREIGN KEY` only, the referenced table and columns.
218    ///
219    /// USAGE_SCHEMA is a Struct with fields:
220    ///
221    /// | Field Name               | Field Type              |
222    /// |--------------------------|-------------------------|
223    /// | fk_catalog               | utf8                    |
224    /// | fk_db_schema             | utf8                    |
225    /// | fk_table                 | utf8 not null           |
226    /// | fk_column_name           | utf8 not null           |
227    ///
228    fn get_objects(
229        &self,
230        depth: options::ObjectDepth,
231        catalog: Option<&str>,
232        db_schema: Option<&str>,
233        table_name: Option<&str>,
234        table_type: Option<Vec<&str>>,
235        column_name: Option<&str>,
236    ) -> Result<Box<dyn RecordBatchReader + Send + 'static>>;
237
238    /// Get the Arrow schema of a table.
239    ///
240    /// # Arguments
241    ///
242    /// - `catalog` - The catalog (or `None` if not applicable).
243    /// - `db_schema` - The database schema (or `None` if not applicable).
244    /// - `table_name` - The table name.
245    fn get_table_schema(
246        &self,
247        catalog: Option<&str>,
248        db_schema: Option<&str>,
249        table_name: &str,
250    ) -> Result<Schema>;
251
252    /// Get a list of table types in the database.
253    ///
254    /// # Result
255    ///
256    /// The result is an Arrow dataset with the following schema:
257    ///
258    /// Field Name     | Field Type
259    /// ---------------|--------------
260    /// table_type     | utf8 not null
261    fn get_table_types(&self) -> Result<Box<dyn RecordBatchReader + Send + 'static>>;
262
263    /// Get the names of statistics specific to this driver.
264    ///
265    /// # Result
266    ///
267    /// The result is an Arrow dataset with the following schema:
268    ///
269    /// Field Name     | Field Type
270    /// ---------------|----------------
271    /// statistic_name | utf8 not null
272    /// statistic_key  | int16 not null
273    ///
274    /// # Since
275    /// ADBC API revision 1.1.0
276    fn get_statistic_names(&self) -> Result<Box<dyn RecordBatchReader + Send + 'static>>;
277
278    /// Get statistics about the data distribution of table(s).
279    ///
280    /// # Arguments
281    ///
282    /// - `catalog` - The catalog (or `None` if not applicable). May be a search pattern.
283    /// - `db_schema` - The database schema (or `None` if not applicable). May be a search pattern
284    /// - `table_name` - The table name (or `None` if not applicable). May be a search pattern
285    /// - `approximate` - If false, request exact values of statistics, else
286    ///   allow for best-effort, approximate, or cached values. The database may
287    ///   return approximate values regardless, as indicated in the result.
288    ///   Requesting exact values may be expensive or unsupported.
289    ///
290    /// # Result
291    ///
292    /// The result is an Arrow dataset with the following schema:
293    ///
294    /// | Field Name               | Field Type                       |
295    /// |--------------------------|----------------------------------|
296    /// | catalog_name             | utf8                             |
297    /// | catalog_db_schemas       | list\<DB_SCHEMA_SCHEMA\> not null|
298    ///
299    /// DB_SCHEMA_SCHEMA is a Struct with fields:
300    ///
301    /// | Field Name               | Field Type                        |
302    /// |--------------------------|-----------------------------------|
303    /// | db_schema_name           | utf8                              |
304    /// | db_schema_statistics     | list\<STATISTICS_SCHEMA\> not null|
305    ///
306    /// STATISTICS_SCHEMA is a Struct with fields:
307    ///
308    /// | Field Name               | Field Type                       | Comments |
309    /// |--------------------------|----------------------------------| -------- |
310    /// | table_name               | utf8 not null                    |          |
311    /// | column_name              | utf8                             | (1)      |
312    /// | statistic_key            | int16 not null                   | (2)      |
313    /// | statistic_value          | VALUE_SCHEMA not null            |          |
314    /// | statistic_is_approximate | bool not null                    | (3)      |
315    ///
316    /// 1. If null, then the statistic applies to the entire table.
317    /// 2. A dictionary-encoded statistic name (although we do not use the Arrow
318    ///    dictionary type). Values in [0, 1024) are reserved for ADBC.  Other
319    ///    values are for implementation-specific statistics.  For the definitions
320    ///    of predefined statistic types, see [options::Statistics]. To get
321    ///    driver-specific statistic names, use [Connection::get_statistic_names].
322    /// 3. If true, then the value is approximate or best-effort.
323    ///
324    /// VALUE_SCHEMA is a dense union with members:
325    ///
326    /// | Field Name               | Field Type                       |
327    /// |--------------------------|----------------------------------|
328    /// | int64                    | int64                            |
329    /// | uint64                   | uint64                           |
330    /// | float64                  | float64                          |
331    /// | binary                   | binary                           |
332    ///
333    /// # Since
334    ///
335    /// ADBC API revision 1.1.0
336    fn get_statistics(
337        &self,
338        catalog: Option<&str>,
339        db_schema: Option<&str>,
340        table_name: Option<&str>,
341        approximate: bool,
342    ) -> Result<Box<dyn RecordBatchReader + Send + 'static>>;
343
344    /// Commit any pending transactions. Only used if autocommit is disabled.
345    ///
346    /// Behavior is undefined if this is mixed with SQL transaction statements.
347    fn commit(&mut self) -> Result<()>;
348
349    /// Roll back any pending transactions. Only used if autocommit is disabled.
350    ///
351    /// Behavior is undefined if this is mixed with SQL transaction statements.
352    fn rollback(&mut self) -> Result<()>;
353
354    /// Retrieve a given partition of data.
355    ///
356    /// A partition can be retrieved from [Statement::execute_partitions].
357    ///
358    /// # Arguments
359    ///
360    /// - `partition` - The partition descriptor.
361    fn read_partition(
362        &self,
363        partition: impl AsRef<[u8]>,
364    ) -> Result<Box<dyn RecordBatchReader + Send + 'static>>;
365}
366
367/// A handle to an ADBC statement.
368///
369/// A statement is a container for all state needed to execute a database query,
370/// such as the query itself, parameters for prepared statements, driver
371/// parameters, etc.
372///
373/// Statements may represent queries or prepared statements.
374///
375/// Statements may be used multiple times and can be reconfigured
376/// (e.g. they can be reused to execute multiple different queries).
377/// However, executing a statement (and changing certain other state)
378/// will invalidate result sets obtained prior to that execution.
379///
380/// Multiple statements may be created from a single connection.
381/// However, the driver may block or error if they are used concurrently
382/// (whether from a single thread or multiple threads).
383pub trait Statement: Optionable<Option = OptionStatement> {
384    /// Bind Arrow data. This can be used for bulk inserts or prepared
385    /// statements.
386    fn bind(&mut self, batch: RecordBatch) -> Result<()>;
387
388    /// Bind Arrow data. This can be used for bulk inserts or prepared
389    /// statements.
390    // TODO(alexandreyc): should we use a generic here instead of a trait object?
391    // See: https://github.com/apache/arrow-adbc/pull/1725#discussion_r1567750972
392    fn bind_stream(&mut self, reader: Box<dyn RecordBatchReader + Send>) -> Result<()>;
393
394    /// Execute a statement and get the results.
395    ///
396    /// This invalidates any prior result sets.
397    fn execute(&mut self) -> Result<Box<dyn RecordBatchReader + Send + 'static>>;
398
399    /// Execute a statement that doesn’t have a result set and get the number
400    /// of affected rows.
401    ///
402    /// This invalidates any prior result sets.
403    ///
404    /// # Result
405    ///
406    /// Will return the number of rows affected. If the affected row count is
407    /// unknown or unsupported by the database, will return `None`.
408    fn execute_update(&mut self) -> Result<Option<i64>>;
409
410    /// Get the schema of the result set of a query without executing it.
411    ///
412    /// This invalidates any prior result sets.
413    ///
414    /// Depending on the driver, this may require first executing
415    /// [Statement::prepare].
416    ///
417    /// # Since
418    ///
419    /// ADBC API revision 1.1.0
420    fn execute_schema(&mut self) -> Result<Schema>;
421
422    /// Execute a statement and get the results as a partitioned result set.
423    fn execute_partitions(&mut self) -> Result<PartitionedResult>;
424
425    /// Get the schema for bound parameters.
426    ///
427    /// This retrieves an Arrow schema describing the number, names, and
428    /// types of the parameters in a parameterized statement. The fields
429    /// of the schema should be in order of the ordinal position of the
430    /// parameters; named parameters should appear only once.
431    ///
432    /// If the parameter does not have a name, or the name cannot be
433    /// determined, the name of the corresponding field in the schema will
434    /// be an empty string. If the type cannot be determined, the type of
435    /// the corresponding field will be NA (NullType).
436    ///
437    /// This should be called after [Statement::prepare].
438    fn get_parameter_schema(&self) -> Result<Schema>;
439
440    /// Turn this statement into a prepared statement to be executed multiple
441    /// times.
442    ///
443    /// This invalidates any prior result sets.
444    fn prepare(&mut self) -> Result<()>;
445
446    /// Set the SQL query to execute.
447    ///
448    /// The query can then be executed with [Statement::execute]. For queries
449    /// expected to be executed repeatedly, call [Statement::prepare] first.
450    fn set_sql_query(&mut self, query: impl AsRef<str>) -> Result<()>;
451
452    /// Set the Substrait plan to execute.
453    ///
454    /// The query can then be executed with [Statement::execute]. For queries
455    /// expected to be executed repeatedly, call [Statement::prepare] first.
456    fn set_substrait_plan(&mut self, plan: impl AsRef<[u8]>) -> Result<()>;
457
458    /// Cancel execution of an in-progress query.
459    ///
460    /// This can be called during [Statement::execute] (or similar), or while
461    /// consuming a result set returned from such.
462    ///
463    /// # Since
464    ///
465    /// ADBC API revision 1.1.0
466    fn cancel(&mut self) -> Result<()>;
467}