arrow_odbc/
schema.rs

1use arrow::datatypes::{DataType as ArrowDataType, Field, Schema, TimeUnit};
2use log::debug;
3use odbc_api::{ColumnDescription, DataType as OdbcDataType, ResultSetMetadata};
4use std::convert::TryInto;
5
6use crate::{ColumnFailure, Error};
7
8/// Query the metadata to create an arrow schema. This method is invoked automatically for you by
9/// [`crate::OdbcReaderBuilder::build`]. You may want to call this method in situtation ther you want
10/// to create an arrow schema without creating the reader yet.
11///
12/// # Example
13///
14/// ```
15/// use anyhow::Error;
16///
17/// use arrow_odbc::{arrow_schema_from, arrow::datatypes::Schema, odbc_api::Connection};
18///
19/// fn fetch_schema_for_table(
20///     table_name: &str,
21///     connection: &Connection<'_>
22/// ) -> Result<Schema, Error> {
23///     // Query column with values to get a cursor
24///     let sql = format!("SELECT * FROM {}", table_name);
25///     let mut prepared = connection.prepare(&sql)?;
26///     
27///     // Now that we have prepared statement, we want to use it to query metadata.
28///     let map_errors_to_null = false;
29///     let schema = arrow_schema_from(&mut prepared, map_errors_to_null)?;
30///     Ok(schema)
31/// }
32/// ```
33pub fn arrow_schema_from(
34    resut_set_metadata: &mut impl ResultSetMetadata,
35    map_value_errors_to_null: bool,
36) -> Result<Schema, Error> {
37    let num_cols: u16 = resut_set_metadata
38        .num_result_cols()
39        .map_err(Error::UnableToRetrieveNumCols)?
40        .try_into()
41        .unwrap();
42    let mut fields = Vec::new();
43    for index in 0..num_cols {
44        let field = arrow_field_from(resut_set_metadata, index, map_value_errors_to_null)?;
45
46        fields.push(field)
47    }
48    Ok(Schema::new(fields))
49}
50
51fn arrow_field_from(
52    resut_set_metadata: &mut impl ResultSetMetadata,
53    index: u16,
54    map_value_errors_to_null: bool,
55) -> Result<Field, Error> {
56    let mut column_description = ColumnDescription::default();
57    resut_set_metadata
58        .describe_col(index + 1, &mut column_description)
59        .map_err(|cause| Error::ColumnFailure {
60            name: "Unknown".to_owned(),
61            index: index as usize,
62            source: ColumnFailure::FailedToDescribeColumn(cause),
63        })?;
64    let name = column_description
65        .name_to_string()
66        .map_err(|source| Error::EncodingInvalid { source })?;
67    debug!(
68        "ODBC driver reported for column {index}. Relational type: {:?}; Nullability: {:?}; \
69            Name: '{name}';",
70        column_description.data_type, column_description.nullability
71    );
72    let data_type = match column_description.data_type {
73        OdbcDataType::Numeric {
74            precision: p @ 0..=38,
75            scale,
76        }
77        | OdbcDataType::Decimal {
78            precision: p @ 0..=38,
79            scale,
80        } => ArrowDataType::Decimal128(p as u8, scale.try_into().unwrap()),
81        OdbcDataType::Integer => ArrowDataType::Int32,
82        OdbcDataType::SmallInt => ArrowDataType::Int16,
83        OdbcDataType::Real | OdbcDataType::Float { precision: 0..=24 } => ArrowDataType::Float32,
84        OdbcDataType::Float { precision: _ } | OdbcDataType::Double => ArrowDataType::Float64,
85        OdbcDataType::Date => ArrowDataType::Date32,
86        OdbcDataType::Timestamp { precision: 0 } => {
87            ArrowDataType::Timestamp(TimeUnit::Second, None)
88        }
89        OdbcDataType::Timestamp { precision: 1..=3 } => {
90            ArrowDataType::Timestamp(TimeUnit::Millisecond, None)
91        }
92        OdbcDataType::Timestamp { precision: 4..=6 } => {
93            ArrowDataType::Timestamp(TimeUnit::Microsecond, None)
94        }
95        OdbcDataType::Timestamp { precision: _ } => {
96            ArrowDataType::Timestamp(TimeUnit::Nanosecond, None)
97        }
98        OdbcDataType::BigInt => ArrowDataType::Int64,
99        OdbcDataType::TinyInt => {
100            let is_unsigned = resut_set_metadata
101                .column_is_unsigned(index + 1)
102                .map_err(|e| Error::ColumnFailure {
103                    name: name.clone(),
104                    index: index as usize,
105                    source: ColumnFailure::FailedToDescribeColumn(e),
106                })?;
107            if is_unsigned {
108                ArrowDataType::UInt8
109            } else {
110                ArrowDataType::Int8
111            }
112        }
113        OdbcDataType::Bit => ArrowDataType::Boolean,
114        OdbcDataType::Binary { length } => {
115            let length = length
116                .ok_or_else(|| Error::ColumnFailure {
117                    name: name.clone(),
118                    index: index as usize,
119                    source: ColumnFailure::ZeroSizedColumn {
120                        sql_type: OdbcDataType::Binary { length },
121                    },
122                })?
123                .get()
124                .try_into()
125                .unwrap();
126            ArrowDataType::FixedSizeBinary(length)
127        }
128        OdbcDataType::LongVarbinary { length: _ } | OdbcDataType::Varbinary { length: _ } => {
129            ArrowDataType::Binary
130        }
131        OdbcDataType::Unknown
132        | OdbcDataType::Time { precision: _ }
133        | OdbcDataType::Numeric { .. }
134        | OdbcDataType::Decimal { .. }
135        | OdbcDataType::Other {
136            data_type: _,
137            column_size: _,
138            decimal_digits: _,
139        }
140        | OdbcDataType::WChar { length: _ }
141        | OdbcDataType::Char { length: _ }
142        | OdbcDataType::WVarchar { length: _ }
143        | OdbcDataType::LongVarchar { length: _ }
144        | OdbcDataType::Varchar { length: _ } => ArrowDataType::Utf8,
145    };
146    let is_falliable = matches!(data_type, ArrowDataType::Timestamp(TimeUnit::Nanosecond, _));
147    let nullable =
148        column_description.could_be_nullable() || (is_falliable && map_value_errors_to_null);
149    let field = Field::new(name, data_type, nullable);
150    Ok(field)
151}