adbc_core/sync.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::collections::HashSet;
19
20use arrow_array::{RecordBatch, RecordBatchReader};
21use arrow_schema::Schema;
22
23use crate::error::Result;
24use crate::options::{self, OptionConnection, OptionDatabase, OptionStatement, OptionValue};
25use crate::PartitionedResult;
26
27/// Ability to configure an object by setting/getting options.
28pub trait Optionable {
29 type Option: AsRef<str>;
30
31 /// Set a post-init option.
32 fn set_option(&mut self, key: Self::Option, value: OptionValue) -> Result<()>;
33
34 /// Get a string option value by key.
35 fn get_option_string(&self, key: Self::Option) -> Result<String>;
36
37 /// Get a bytes option value by key.
38 fn get_option_bytes(&self, key: Self::Option) -> Result<Vec<u8>>;
39
40 /// Get an integer option value by key.
41 fn get_option_int(&self, key: Self::Option) -> Result<i64>;
42
43 /// Get a float option value by key.
44 fn get_option_double(&self, key: Self::Option) -> Result<f64>;
45}
46
47/// A handle to an ADBC driver.
48pub trait Driver {
49 type DatabaseType: Database;
50
51 /// Allocate and initialize a new database without pre-init options.
52 fn new_database(&mut self) -> Result<Self::DatabaseType>;
53
54 /// Allocate and initialize a new database with pre-init options.
55 fn new_database_with_opts(
56 &mut self,
57 opts: impl IntoIterator<Item = (OptionDatabase, OptionValue)>,
58 ) -> Result<Self::DatabaseType>;
59}
60
61/// A handle to an ADBC database.
62///
63/// Databases hold state shared by multiple connections. This typically means
64/// configuration and caches. For in-memory databases, it provides a place to
65/// hold ownership of the in-memory database.
66///
67/// Databases must be kept alive as long as any connections exist.
68pub trait Database: Optionable<Option = OptionDatabase> {
69 type ConnectionType: Connection;
70
71 /// Allocate and initialize a new connection without pre-init options.
72 fn new_connection(&self) -> Result<Self::ConnectionType>;
73
74 /// Allocate and initialize a new connection with pre-init options.
75 fn new_connection_with_opts(
76 &self,
77 opts: impl IntoIterator<Item = (options::OptionConnection, OptionValue)>,
78 ) -> Result<Self::ConnectionType>;
79}
80
81/// A handle to an ADBC connection.
82///
83/// Connections provide methods for query execution, managing prepared
84/// statements, using transactions, and so on.
85///
86/// # Autocommit
87///
88/// Connections should start in autocommit mode. They can be moved out by
89/// setting [options::OptionConnection::AutoCommit] to "false". Turning off
90/// autocommit allows customizing the isolation level.
91pub trait Connection: Optionable<Option = OptionConnection> {
92 type StatementType: Statement;
93
94 /// Allocate and initialize a new statement.
95 fn new_statement(&mut self) -> Result<Self::StatementType>;
96
97 /// Cancel the in-progress operation on a connection.
98 fn cancel(&mut self) -> Result<()>;
99
100 /// Get metadata about the database/driver.
101 ///
102 /// # Arguments
103 ///
104 /// - `codes` - Requested metadata. If `None`, retrieve all available metadata.
105 ///
106 /// # Result
107 ///
108 /// The result is an Arrow dataset with the following schema:
109 ///
110 /// Field Name | Field Type
111 /// ----------------------------|------------------------
112 /// info_name | uint32 not null
113 /// info_value | INFO_SCHEMA
114 ///
115 /// INFO_SCHEMA is a dense union with members:
116 ///
117 /// Field Name (Type Code) | Field Type
118 /// ----------------------------|------------------------
119 /// string_value (0) | utf8
120 /// bool_value (1) | bool
121 /// int64_value (2) | int64
122 /// int32_bitmask (3) | int32
123 /// string_list (4) | list\<utf8\>
124 /// int32_to_int32_list_map (5) | map\<int32, list\<int32\>\>
125 fn get_info(
126 &self,
127 codes: Option<HashSet<options::InfoCode>>,
128 ) -> Result<Box<dyn RecordBatchReader + Send + 'static>>;
129
130 /// Get a hierarchical view of all catalogs, database schemas, tables, and
131 /// columns.
132 ///
133 /// # Arguments
134 ///
135 /// - `depth` - The level of nesting to query.
136 /// - `catalog` - Only show tables in the given catalog. If `None`,
137 /// do not filter by catalog. If an empty string, only show tables
138 /// without a catalog. May be a search pattern.
139 /// - `db_schema` - Only show tables in the given database schema. If
140 /// `None`, do not filter by database schema. If an empty string, only show
141 /// tables without a database schema. May be a search pattern.
142 /// - `table_name` - Only show tables with the given name. If `None`, do not
143 /// filter by name. May be a search pattern.
144 /// - `table_type` - Only show tables matching one of the given table
145 /// types. If `None`, show tables of any type. Valid table types can be fetched
146 /// from [Connection::get_table_types].
147 /// - `column_name` - Only show columns with the given name. If
148 /// `None`, do not filter by name. May be a search pattern..
149 ///
150 /// # Result
151 ///
152 /// The result is an Arrow dataset with the following schema:
153 ///
154 /// | Field Name | Field Type |
155 /// |--------------------------|--------------------------|
156 /// | catalog_name | utf8 |
157 /// | catalog_db_schemas | list\<DB_SCHEMA_SCHEMA\> |
158 ///
159 /// DB_SCHEMA_SCHEMA is a Struct with fields:
160 ///
161 /// | Field Name | Field Type |
162 /// |--------------------------|-------------------------|
163 /// | db_schema_name | utf8 |
164 /// | db_schema_tables | list\<TABLE_SCHEMA\> |
165 ///
166 /// TABLE_SCHEMA is a Struct with fields:
167 ///
168 /// | Field Name | Field Type |
169 /// |--------------------------|---------------------------|
170 /// | table_name | utf8 not null |
171 /// | table_type | utf8 not null |
172 /// | table_columns | list\<COLUMN_SCHEMA\> |
173 /// | table_constraints | list\<CONSTRAINT_SCHEMA\> |
174 ///
175 /// COLUMN_SCHEMA is a Struct with fields:
176 ///
177 /// | Field Name | Field Type | Comments |
178 /// |--------------------------|-------------------------|----------|
179 /// | column_name | utf8 not null | |
180 /// | ordinal_position | int32 | (1) |
181 /// | remarks | utf8 | (2) |
182 /// | xdbc_data_type | int16 | (3) |
183 /// | xdbc_type_name | utf8 | (3) |
184 /// | xdbc_column_size | int32 | (3) |
185 /// | xdbc_decimal_digits | int16 | (3) |
186 /// | xdbc_num_prec_radix | int16 | (3) |
187 /// | xdbc_nullable | int16 | (3) |
188 /// | xdbc_column_def | utf8 | (3) |
189 /// | xdbc_sql_data_type | int16 | (3) |
190 /// | xdbc_datetime_sub | int16 | (3) |
191 /// | xdbc_char_octet_length | int32 | (3) |
192 /// | xdbc_is_nullable | utf8 | (3) |
193 /// | xdbc_scope_catalog | utf8 | (3) |
194 /// | xdbc_scope_schema | utf8 | (3) |
195 /// | xdbc_scope_table | utf8 | (3) |
196 /// | xdbc_is_autoincrement | bool | (3) |
197 /// | xdbc_is_generatedcolumn | bool | (3) |
198 ///
199 /// 1. The column's ordinal position in the table (starting from 1).
200 /// 2. Database-specific description of the column.
201 /// 3. Optional value. Should be null if not supported by the driver.
202 /// `xdbc_` values are meant to provide JDBC/ODBC-compatible metadata
203 /// in an agnostic manner.
204 ///
205 /// CONSTRAINT_SCHEMA is a Struct with fields:
206 ///
207 /// | Field Name | Field Type | Comments |
208 /// |--------------------------|-------------------------|----------|
209 /// | constraint_name | utf8 | |
210 /// | constraint_type | utf8 not null | (1) |
211 /// | constraint_column_names | list\<utf8\> not null | (2) |
212 /// | constraint_column_usage | list\<USAGE_SCHEMA\> | (3) |
213 ///
214 /// 1. One of `CHECK`, `FOREIGN KEY`, `PRIMARY KEY`, or `UNIQUE`.
215 /// 2. The columns on the current table that are constrained, in
216 /// order.
217 /// 3. For `FOREIGN KEY` only, the referenced table and columns.
218 ///
219 /// USAGE_SCHEMA is a Struct with fields:
220 ///
221 /// | Field Name | Field Type |
222 /// |--------------------------|-------------------------|
223 /// | fk_catalog | utf8 |
224 /// | fk_db_schema | utf8 |
225 /// | fk_table | utf8 not null |
226 /// | fk_column_name | utf8 not null |
227 ///
228 fn get_objects(
229 &self,
230 depth: options::ObjectDepth,
231 catalog: Option<&str>,
232 db_schema: Option<&str>,
233 table_name: Option<&str>,
234 table_type: Option<Vec<&str>>,
235 column_name: Option<&str>,
236 ) -> Result<Box<dyn RecordBatchReader + Send + 'static>>;
237
238 /// Get the Arrow schema of a table.
239 ///
240 /// # Arguments
241 ///
242 /// - `catalog` - The catalog (or `None` if not applicable).
243 /// - `db_schema` - The database schema (or `None` if not applicable).
244 /// - `table_name` - The table name.
245 fn get_table_schema(
246 &self,
247 catalog: Option<&str>,
248 db_schema: Option<&str>,
249 table_name: &str,
250 ) -> Result<Schema>;
251
252 /// Get a list of table types in the database.
253 ///
254 /// # Result
255 ///
256 /// The result is an Arrow dataset with the following schema:
257 ///
258 /// Field Name | Field Type
259 /// ---------------|--------------
260 /// table_type | utf8 not null
261 fn get_table_types(&self) -> Result<Box<dyn RecordBatchReader + Send + 'static>>;
262
263 /// Get the names of statistics specific to this driver.
264 ///
265 /// # Result
266 ///
267 /// The result is an Arrow dataset with the following schema:
268 ///
269 /// Field Name | Field Type
270 /// ---------------|----------------
271 /// statistic_name | utf8 not null
272 /// statistic_key | int16 not null
273 ///
274 /// # Since
275 /// ADBC API revision 1.1.0
276 fn get_statistic_names(&self) -> Result<Box<dyn RecordBatchReader + Send + 'static>>;
277
278 /// Get statistics about the data distribution of table(s).
279 ///
280 /// # Arguments
281 ///
282 /// - `catalog` - The catalog (or `None` if not applicable). May be a search pattern.
283 /// - `db_schema` - The database schema (or `None` if not applicable). May be a search pattern
284 /// - `table_name` - The table name (or `None` if not applicable). May be a search pattern
285 /// - `approximate` - If false, request exact values of statistics, else
286 /// allow for best-effort, approximate, or cached values. The database may
287 /// return approximate values regardless, as indicated in the result.
288 /// Requesting exact values may be expensive or unsupported.
289 ///
290 /// # Result
291 ///
292 /// The result is an Arrow dataset with the following schema:
293 ///
294 /// | Field Name | Field Type |
295 /// |--------------------------|----------------------------------|
296 /// | catalog_name | utf8 |
297 /// | catalog_db_schemas | list\<DB_SCHEMA_SCHEMA\> not null|
298 ///
299 /// DB_SCHEMA_SCHEMA is a Struct with fields:
300 ///
301 /// | Field Name | Field Type |
302 /// |--------------------------|-----------------------------------|
303 /// | db_schema_name | utf8 |
304 /// | db_schema_statistics | list\<STATISTICS_SCHEMA\> not null|
305 ///
306 /// STATISTICS_SCHEMA is a Struct with fields:
307 ///
308 /// | Field Name | Field Type | Comments |
309 /// |--------------------------|----------------------------------| -------- |
310 /// | table_name | utf8 not null | |
311 /// | column_name | utf8 | (1) |
312 /// | statistic_key | int16 not null | (2) |
313 /// | statistic_value | VALUE_SCHEMA not null | |
314 /// | statistic_is_approximate | bool not null | (3) |
315 ///
316 /// 1. If null, then the statistic applies to the entire table.
317 /// 2. A dictionary-encoded statistic name (although we do not use the Arrow
318 /// dictionary type). Values in [0, 1024) are reserved for ADBC. Other
319 /// values are for implementation-specific statistics. For the definitions
320 /// of predefined statistic types, see [options::Statistics]. To get
321 /// driver-specific statistic names, use [Connection::get_statistic_names].
322 /// 3. If true, then the value is approximate or best-effort.
323 ///
324 /// VALUE_SCHEMA is a dense union with members:
325 ///
326 /// | Field Name | Field Type |
327 /// |--------------------------|----------------------------------|
328 /// | int64 | int64 |
329 /// | uint64 | uint64 |
330 /// | float64 | float64 |
331 /// | binary | binary |
332 ///
333 /// # Since
334 ///
335 /// ADBC API revision 1.1.0
336 fn get_statistics(
337 &self,
338 catalog: Option<&str>,
339 db_schema: Option<&str>,
340 table_name: Option<&str>,
341 approximate: bool,
342 ) -> Result<Box<dyn RecordBatchReader + Send + 'static>>;
343
344 /// Commit any pending transactions. Only used if autocommit is disabled.
345 ///
346 /// Behavior is undefined if this is mixed with SQL transaction statements.
347 fn commit(&mut self) -> Result<()>;
348
349 /// Roll back any pending transactions. Only used if autocommit is disabled.
350 ///
351 /// Behavior is undefined if this is mixed with SQL transaction statements.
352 fn rollback(&mut self) -> Result<()>;
353
354 /// Retrieve a given partition of data.
355 ///
356 /// A partition can be retrieved from [Statement::execute_partitions].
357 ///
358 /// # Arguments
359 ///
360 /// - `partition` - The partition descriptor.
361 fn read_partition(
362 &self,
363 partition: impl AsRef<[u8]>,
364 ) -> Result<Box<dyn RecordBatchReader + Send + 'static>>;
365}
366
367/// A handle to an ADBC statement.
368///
369/// A statement is a container for all state needed to execute a database query,
370/// such as the query itself, parameters for prepared statements, driver
371/// parameters, etc.
372///
373/// Statements may represent queries or prepared statements.
374///
375/// Statements may be used multiple times and can be reconfigured
376/// (e.g. they can be reused to execute multiple different queries).
377/// However, executing a statement (and changing certain other state)
378/// will invalidate result sets obtained prior to that execution.
379///
380/// Multiple statements may be created from a single connection.
381/// However, the driver may block or error if they are used concurrently
382/// (whether from a single thread or multiple threads).
383pub trait Statement: Optionable<Option = OptionStatement> {
384 /// Bind Arrow data. This can be used for bulk inserts or prepared
385 /// statements.
386 fn bind(&mut self, batch: RecordBatch) -> Result<()>;
387
388 /// Bind Arrow data. This can be used for bulk inserts or prepared
389 /// statements.
390 // TODO(alexandreyc): should we use a generic here instead of a trait object?
391 // See: https://github.com/apache/arrow-adbc/pull/1725#discussion_r1567750972
392 fn bind_stream(&mut self, reader: Box<dyn RecordBatchReader + Send>) -> Result<()>;
393
394 /// Execute a statement and get the results.
395 ///
396 /// This invalidates any prior result sets.
397 fn execute(&mut self) -> Result<Box<dyn RecordBatchReader + Send + 'static>>;
398
399 /// Execute a statement that doesn’t have a result set and get the number
400 /// of affected rows.
401 ///
402 /// This invalidates any prior result sets.
403 ///
404 /// # Result
405 ///
406 /// Will return the number of rows affected. If the affected row count is
407 /// unknown or unsupported by the database, will return `None`.
408 fn execute_update(&mut self) -> Result<Option<i64>>;
409
410 /// Get the schema of the result set of a query without executing it.
411 ///
412 /// This invalidates any prior result sets.
413 ///
414 /// Depending on the driver, this may require first executing
415 /// [Statement::prepare].
416 ///
417 /// # Since
418 ///
419 /// ADBC API revision 1.1.0
420 fn execute_schema(&mut self) -> Result<Schema>;
421
422 /// Execute a statement and get the results as a partitioned result set.
423 fn execute_partitions(&mut self) -> Result<PartitionedResult>;
424
425 /// Get the schema for bound parameters.
426 ///
427 /// This retrieves an Arrow schema describing the number, names, and
428 /// types of the parameters in a parameterized statement. The fields
429 /// of the schema should be in order of the ordinal position of the
430 /// parameters; named parameters should appear only once.
431 ///
432 /// If the parameter does not have a name, or the name cannot be
433 /// determined, the name of the corresponding field in the schema will
434 /// be an empty string. If the type cannot be determined, the type of
435 /// the corresponding field will be NA (NullType).
436 ///
437 /// This should be called after [Statement::prepare].
438 fn get_parameter_schema(&self) -> Result<Schema>;
439
440 /// Turn this statement into a prepared statement to be executed multiple
441 /// times.
442 ///
443 /// This invalidates any prior result sets.
444 fn prepare(&mut self) -> Result<()>;
445
446 /// Set the SQL query to execute.
447 ///
448 /// The query can then be executed with [Statement::execute]. For queries
449 /// expected to be executed repeatedly, call [Statement::prepare] first.
450 fn set_sql_query(&mut self, query: impl AsRef<str>) -> Result<()>;
451
452 /// Set the Substrait plan to execute.
453 ///
454 /// The query can then be executed with [Statement::execute]. For queries
455 /// expected to be executed repeatedly, call [Statement::prepare] first.
456 fn set_substrait_plan(&mut self, plan: impl AsRef<[u8]>) -> Result<()>;
457
458 /// Cancel execution of an in-progress query.
459 ///
460 /// This can be called during [Statement::execute] (or similar), or while
461 /// consuming a result set returned from such.
462 ///
463 /// # Since
464 ///
465 /// ADBC API revision 1.1.0
466 fn cancel(&mut self) -> Result<()>;
467}