1use datafusion::arrow::array::Array;
19use datafusion::arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
20use datafusion::common::{DataFusionError, ScalarValue};
21use datafusion::logical_expr::sqlparser::ast::NullTreatment as DFNullTreatment;
22use pyo3::{exceptions::PyValueError, prelude::*};
23
24use crate::errors::py_datafusion_err;
25
26#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
27pub struct PyScalarValue(pub ScalarValue);
28
29impl From<ScalarValue> for PyScalarValue {
30 fn from(value: ScalarValue) -> Self {
31 Self(value)
32 }
33}
34impl From<PyScalarValue> for ScalarValue {
35 fn from(value: PyScalarValue) -> Self {
36 value.0
37 }
38}
39
40#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
41#[pyclass(eq, eq_int, name = "RexType", module = "datafusion.common")]
42pub enum RexType {
43 Alias,
44 Literal,
45 Call,
46 Reference,
47 ScalarSubquery,
48 Other,
49}
50
51#[derive(Debug, Clone)]
61#[pyclass(name = "DataTypeMap", module = "datafusion.common", subclass)]
62pub struct DataTypeMap {
63 #[pyo3(get, set)]
64 pub arrow_type: PyDataType,
65 #[pyo3(get, set)]
66 pub python_type: PythonType,
67 #[pyo3(get, set)]
68 pub sql_type: SqlType,
69}
70
71impl DataTypeMap {
72 fn new(arrow_type: DataType, python_type: PythonType, sql_type: SqlType) -> Self {
73 DataTypeMap {
74 arrow_type: PyDataType {
75 data_type: arrow_type,
76 },
77 python_type,
78 sql_type,
79 }
80 }
81
82 pub fn map_from_arrow_type(arrow_type: &DataType) -> Result<DataTypeMap, PyErr> {
83 match arrow_type {
84 DataType::Null => Ok(DataTypeMap::new(
85 DataType::Null,
86 PythonType::None,
87 SqlType::NULL,
88 )),
89 DataType::Boolean => Ok(DataTypeMap::new(
90 DataType::Boolean,
91 PythonType::Bool,
92 SqlType::BOOLEAN,
93 )),
94 DataType::Int8 => Ok(DataTypeMap::new(
95 DataType::Int8,
96 PythonType::Int,
97 SqlType::TINYINT,
98 )),
99 DataType::Int16 => Ok(DataTypeMap::new(
100 DataType::Int16,
101 PythonType::Int,
102 SqlType::SMALLINT,
103 )),
104 DataType::Int32 => Ok(DataTypeMap::new(
105 DataType::Int32,
106 PythonType::Int,
107 SqlType::INTEGER,
108 )),
109 DataType::Int64 => Ok(DataTypeMap::new(
110 DataType::Int64,
111 PythonType::Int,
112 SqlType::BIGINT,
113 )),
114 DataType::UInt8 => Ok(DataTypeMap::new(
115 DataType::UInt8,
116 PythonType::Int,
117 SqlType::TINYINT,
118 )),
119 DataType::UInt16 => Ok(DataTypeMap::new(
120 DataType::UInt16,
121 PythonType::Int,
122 SqlType::SMALLINT,
123 )),
124 DataType::UInt32 => Ok(DataTypeMap::new(
125 DataType::UInt32,
126 PythonType::Int,
127 SqlType::INTEGER,
128 )),
129 DataType::UInt64 => Ok(DataTypeMap::new(
130 DataType::UInt64,
131 PythonType::Int,
132 SqlType::BIGINT,
133 )),
134 DataType::Float16 => Ok(DataTypeMap::new(
135 DataType::Float16,
136 PythonType::Float,
137 SqlType::FLOAT,
138 )),
139 DataType::Float32 => Ok(DataTypeMap::new(
140 DataType::Float32,
141 PythonType::Float,
142 SqlType::FLOAT,
143 )),
144 DataType::Float64 => Ok(DataTypeMap::new(
145 DataType::Float64,
146 PythonType::Float,
147 SqlType::FLOAT,
148 )),
149 DataType::Timestamp(unit, tz) => Ok(DataTypeMap::new(
150 DataType::Timestamp(*unit, tz.clone()),
151 PythonType::Datetime,
152 SqlType::DATE,
153 )),
154 DataType::Date32 => Ok(DataTypeMap::new(
155 DataType::Date32,
156 PythonType::Datetime,
157 SqlType::DATE,
158 )),
159 DataType::Date64 => Ok(DataTypeMap::new(
160 DataType::Date64,
161 PythonType::Datetime,
162 SqlType::DATE,
163 )),
164 DataType::Time32(unit) => Ok(DataTypeMap::new(
165 DataType::Time32(*unit),
166 PythonType::Datetime,
167 SqlType::DATE,
168 )),
169 DataType::Time64(unit) => Ok(DataTypeMap::new(
170 DataType::Time64(*unit),
171 PythonType::Datetime,
172 SqlType::DATE,
173 )),
174 DataType::Duration(_) => Err(py_datafusion_err(DataFusionError::NotImplemented(
175 format!("{arrow_type:?}"),
176 ))),
177 DataType::Interval(interval_unit) => Ok(DataTypeMap::new(
178 DataType::Interval(*interval_unit),
179 PythonType::Datetime,
180 match interval_unit {
181 IntervalUnit::DayTime => SqlType::INTERVAL_DAY,
182 IntervalUnit::MonthDayNano => SqlType::INTERVAL_MONTH,
183 IntervalUnit::YearMonth => SqlType::INTERVAL_YEAR_MONTH,
184 },
185 )),
186 DataType::Binary => Ok(DataTypeMap::new(
187 DataType::Binary,
188 PythonType::Bytes,
189 SqlType::BINARY,
190 )),
191 DataType::FixedSizeBinary(_) => Err(py_datafusion_err(
192 DataFusionError::NotImplemented(format!("{arrow_type:?}")),
193 )),
194 DataType::LargeBinary => Ok(DataTypeMap::new(
195 DataType::LargeBinary,
196 PythonType::Bytes,
197 SqlType::BINARY,
198 )),
199 DataType::Utf8 => Ok(DataTypeMap::new(
200 DataType::Utf8,
201 PythonType::Str,
202 SqlType::VARCHAR,
203 )),
204 DataType::LargeUtf8 => Ok(DataTypeMap::new(
205 DataType::LargeUtf8,
206 PythonType::Str,
207 SqlType::VARCHAR,
208 )),
209 DataType::List(_) => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
210 "{arrow_type:?}"
211 )))),
212 DataType::FixedSizeList(_, _) => Err(py_datafusion_err(
213 DataFusionError::NotImplemented(format!("{arrow_type:?}")),
214 )),
215 DataType::LargeList(_) => Err(py_datafusion_err(DataFusionError::NotImplemented(
216 format!("{arrow_type:?}"),
217 ))),
218 DataType::Struct(_) => Err(py_datafusion_err(DataFusionError::NotImplemented(
219 format!("{arrow_type:?}"),
220 ))),
221 DataType::Union(_, _) => Err(py_datafusion_err(DataFusionError::NotImplemented(
222 format!("{arrow_type:?}"),
223 ))),
224 DataType::Dictionary(_, _) => Err(py_datafusion_err(DataFusionError::NotImplemented(
225 format!("{arrow_type:?}"),
226 ))),
227 DataType::Decimal128(precision, scale) => Ok(DataTypeMap::new(
228 DataType::Decimal128(*precision, *scale),
229 PythonType::Float,
230 SqlType::DECIMAL,
231 )),
232 DataType::Decimal256(precision, scale) => Ok(DataTypeMap::new(
233 DataType::Decimal256(*precision, *scale),
234 PythonType::Float,
235 SqlType::DECIMAL,
236 )),
237 DataType::Map(_, _) => Err(py_datafusion_err(DataFusionError::NotImplemented(
238 format!("{arrow_type:?}"),
239 ))),
240 DataType::RunEndEncoded(_, _) => Err(py_datafusion_err(
241 DataFusionError::NotImplemented(format!("{arrow_type:?}")),
242 )),
243 DataType::BinaryView => Err(py_datafusion_err(DataFusionError::NotImplemented(
244 format!("{arrow_type:?}"),
245 ))),
246 DataType::Utf8View => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
247 "{arrow_type:?}"
248 )))),
249 DataType::ListView(_) => Err(py_datafusion_err(DataFusionError::NotImplemented(
250 format!("{arrow_type:?}"),
251 ))),
252 DataType::LargeListView(_) => Err(py_datafusion_err(DataFusionError::NotImplemented(
253 format!("{arrow_type:?}"),
254 ))),
255 }
256 }
257
258 pub fn map_from_scalar_value(scalar_val: &ScalarValue) -> Result<DataTypeMap, PyErr> {
260 DataTypeMap::map_from_arrow_type(&DataTypeMap::map_from_scalar_to_arrow(scalar_val)?)
261 }
262
263 pub fn map_from_scalar_to_arrow(scalar_val: &ScalarValue) -> Result<DataType, PyErr> {
265 match scalar_val {
266 ScalarValue::Boolean(_) => Ok(DataType::Boolean),
267 ScalarValue::Float16(_) => Ok(DataType::Float16),
268 ScalarValue::Float32(_) => Ok(DataType::Float32),
269 ScalarValue::Float64(_) => Ok(DataType::Float64),
270 ScalarValue::Decimal128(_, precision, scale) => {
271 Ok(DataType::Decimal128(*precision, *scale))
272 }
273 ScalarValue::Decimal256(_, precision, scale) => {
274 Ok(DataType::Decimal256(*precision, *scale))
275 }
276 ScalarValue::Dictionary(data_type, scalar_type) => {
277 Ok(DataType::Dictionary(
279 Box::new(*data_type.clone()),
280 Box::new(DataTypeMap::map_from_scalar_to_arrow(scalar_type)?),
281 ))
282 }
283 ScalarValue::Int8(_) => Ok(DataType::Int8),
284 ScalarValue::Int16(_) => Ok(DataType::Int16),
285 ScalarValue::Int32(_) => Ok(DataType::Int32),
286 ScalarValue::Int64(_) => Ok(DataType::Int64),
287 ScalarValue::UInt8(_) => Ok(DataType::UInt8),
288 ScalarValue::UInt16(_) => Ok(DataType::UInt16),
289 ScalarValue::UInt32(_) => Ok(DataType::UInt32),
290 ScalarValue::UInt64(_) => Ok(DataType::UInt64),
291 ScalarValue::Utf8(_) => Ok(DataType::Utf8),
292 ScalarValue::LargeUtf8(_) => Ok(DataType::LargeUtf8),
293 ScalarValue::Binary(_) => Ok(DataType::Binary),
294 ScalarValue::LargeBinary(_) => Ok(DataType::LargeBinary),
295 ScalarValue::Date32(_) => Ok(DataType::Date32),
296 ScalarValue::Date64(_) => Ok(DataType::Date64),
297 ScalarValue::Time32Second(_) => Ok(DataType::Time32(TimeUnit::Second)),
298 ScalarValue::Time32Millisecond(_) => Ok(DataType::Time32(TimeUnit::Millisecond)),
299 ScalarValue::Time64Microsecond(_) => Ok(DataType::Time64(TimeUnit::Microsecond)),
300 ScalarValue::Time64Nanosecond(_) => Ok(DataType::Time64(TimeUnit::Nanosecond)),
301 ScalarValue::Null => Ok(DataType::Null),
302 ScalarValue::TimestampSecond(_, tz) => {
303 Ok(DataType::Timestamp(TimeUnit::Second, tz.to_owned()))
304 }
305 ScalarValue::TimestampMillisecond(_, tz) => {
306 Ok(DataType::Timestamp(TimeUnit::Millisecond, tz.to_owned()))
307 }
308 ScalarValue::TimestampMicrosecond(_, tz) => {
309 Ok(DataType::Timestamp(TimeUnit::Microsecond, tz.to_owned()))
310 }
311 ScalarValue::TimestampNanosecond(_, tz) => {
312 Ok(DataType::Timestamp(TimeUnit::Nanosecond, tz.to_owned()))
313 }
314 ScalarValue::IntervalYearMonth(..) => Ok(DataType::Interval(IntervalUnit::YearMonth)),
315 ScalarValue::IntervalDayTime(..) => Ok(DataType::Interval(IntervalUnit::DayTime)),
316 ScalarValue::IntervalMonthDayNano(..) => {
317 Ok(DataType::Interval(IntervalUnit::MonthDayNano))
318 }
319 ScalarValue::List(arr) => Ok(arr.data_type().to_owned()),
320 ScalarValue::Struct(_fields) => Err(py_datafusion_err(
321 DataFusionError::NotImplemented("ScalarValue::Struct".to_string()),
322 )),
323 ScalarValue::FixedSizeBinary(size, _) => Ok(DataType::FixedSizeBinary(*size)),
324 ScalarValue::FixedSizeList(_array_ref) => {
325 Err(py_datafusion_err(DataFusionError::NotImplemented(
329 "ScalarValue::FixedSizeList".to_string(),
330 )))
331 }
332 ScalarValue::LargeList(_) => Err(py_datafusion_err(DataFusionError::NotImplemented(
333 "ScalarValue::LargeList".to_string(),
334 ))),
335 ScalarValue::DurationSecond(_) => Ok(DataType::Duration(TimeUnit::Second)),
336 ScalarValue::DurationMillisecond(_) => Ok(DataType::Duration(TimeUnit::Millisecond)),
337 ScalarValue::DurationMicrosecond(_) => Ok(DataType::Duration(TimeUnit::Microsecond)),
338 ScalarValue::DurationNanosecond(_) => Ok(DataType::Duration(TimeUnit::Nanosecond)),
339 ScalarValue::Union(_, _, _) => Err(py_datafusion_err(DataFusionError::NotImplemented(
340 "ScalarValue::LargeList".to_string(),
341 ))),
342 ScalarValue::Utf8View(_) => Ok(DataType::Utf8View),
343 ScalarValue::BinaryView(_) => Ok(DataType::BinaryView),
344 ScalarValue::Map(_) => Err(py_datafusion_err(DataFusionError::NotImplemented(
345 "ScalarValue::Map".to_string(),
346 ))),
347 }
348 }
349}
350
351#[pymethods]
352impl DataTypeMap {
353 #[new]
354 pub fn py_new(arrow_type: PyDataType, python_type: PythonType, sql_type: SqlType) -> Self {
355 DataTypeMap {
356 arrow_type,
357 python_type,
358 sql_type,
359 }
360 }
361
362 #[staticmethod]
363 #[pyo3(name = "from_parquet_type_str")]
364 pub fn py_map_from_parquet_type_str(parquet_str_type: String) -> PyResult<DataTypeMap> {
368 let arrow_dtype = match parquet_str_type.to_lowercase().as_str() {
369 "boolean" => Ok(DataType::Boolean),
370 "int32" => Ok(DataType::Int32),
371 "int64" => Ok(DataType::Int64),
372 "int96" => {
373 Ok(DataType::Timestamp(TimeUnit::Nanosecond, None))
375 }
376 "float" => Ok(DataType::Float32),
377 "double" => Ok(DataType::Float64),
378 "byte_array" => Ok(DataType::Utf8),
379 _ => Err(PyValueError::new_err(format!(
380 "Unable to determine Arrow Data Type from Parquet String type: {parquet_str_type:?}"
381 ))),
382 };
383 DataTypeMap::map_from_arrow_type(&arrow_dtype?)
384 }
385
386 #[staticmethod]
387 #[pyo3(name = "arrow")]
388 pub fn py_map_from_arrow_type(arrow_type: &PyDataType) -> PyResult<DataTypeMap> {
389 DataTypeMap::map_from_arrow_type(&arrow_type.data_type)
390 }
391
392 #[staticmethod]
393 #[pyo3(name = "arrow_str")]
394 pub fn py_map_from_arrow_type_str(arrow_type_str: String) -> PyResult<DataTypeMap> {
395 let data_type = PyDataType::py_map_from_arrow_type_str(arrow_type_str);
396 DataTypeMap::map_from_arrow_type(&data_type?.data_type)
397 }
398
399 #[staticmethod]
400 #[pyo3(name = "sql")]
401 pub fn py_map_from_sql_type(sql_type: &SqlType) -> PyResult<DataTypeMap> {
402 match sql_type {
403 SqlType::ANY => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
404 "{sql_type:?}"
405 )))),
406 SqlType::ARRAY => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
407 "{sql_type:?}"
408 )))),
409 SqlType::BIGINT => Ok(DataTypeMap::new(
410 DataType::Int64,
411 PythonType::Int,
412 SqlType::BIGINT,
413 )),
414 SqlType::BINARY => Ok(DataTypeMap::new(
415 DataType::Binary,
416 PythonType::Bytes,
417 SqlType::BINARY,
418 )),
419 SqlType::BOOLEAN => Ok(DataTypeMap::new(
420 DataType::Boolean,
421 PythonType::Bool,
422 SqlType::BOOLEAN,
423 )),
424 SqlType::CHAR => Ok(DataTypeMap::new(
425 DataType::UInt8,
426 PythonType::Int,
427 SqlType::CHAR,
428 )),
429 SqlType::COLUMN_LIST => Err(py_datafusion_err(DataFusionError::NotImplemented(
430 format!("{sql_type:?}"),
431 ))),
432 SqlType::CURSOR => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
433 "{sql_type:?}"
434 )))),
435 SqlType::DATE => Ok(DataTypeMap::new(
436 DataType::Date64,
437 PythonType::Datetime,
438 SqlType::DATE,
439 )),
440 SqlType::DECIMAL => Ok(DataTypeMap::new(
441 DataType::Decimal128(1, 1),
442 PythonType::Float,
443 SqlType::DECIMAL,
444 )),
445 SqlType::DISTINCT => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
446 "{sql_type:?}"
447 )))),
448 SqlType::DOUBLE => Ok(DataTypeMap::new(
449 DataType::Decimal256(1, 1),
450 PythonType::Float,
451 SqlType::DOUBLE,
452 )),
453 SqlType::DYNAMIC_STAR => Err(py_datafusion_err(DataFusionError::NotImplemented(
454 format!("{sql_type:?}"),
455 ))),
456 SqlType::FLOAT => Ok(DataTypeMap::new(
457 DataType::Decimal128(1, 1),
458 PythonType::Float,
459 SqlType::FLOAT,
460 )),
461 SqlType::GEOMETRY => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
462 "{sql_type:?}"
463 )))),
464 SqlType::INTEGER => Ok(DataTypeMap::new(
465 DataType::Int8,
466 PythonType::Int,
467 SqlType::INTEGER,
468 )),
469 SqlType::INTERVAL => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
470 "{sql_type:?}"
471 )))),
472 SqlType::INTERVAL_DAY => Err(py_datafusion_err(DataFusionError::NotImplemented(
473 format!("{sql_type:?}"),
474 ))),
475 SqlType::INTERVAL_DAY_HOUR => Err(py_datafusion_err(DataFusionError::NotImplemented(
476 format!("{sql_type:?}"),
477 ))),
478 SqlType::INTERVAL_DAY_MINUTE => Err(py_datafusion_err(
479 DataFusionError::NotImplemented(format!("{sql_type:?}")),
480 )),
481 SqlType::INTERVAL_DAY_SECOND => Err(py_datafusion_err(
482 DataFusionError::NotImplemented(format!("{sql_type:?}")),
483 )),
484 SqlType::INTERVAL_HOUR => Err(py_datafusion_err(DataFusionError::NotImplemented(
485 format!("{sql_type:?}"),
486 ))),
487 SqlType::INTERVAL_HOUR_MINUTE => Err(py_datafusion_err(
488 DataFusionError::NotImplemented(format!("{sql_type:?}")),
489 )),
490 SqlType::INTERVAL_HOUR_SECOND => Err(py_datafusion_err(
491 DataFusionError::NotImplemented(format!("{sql_type:?}")),
492 )),
493 SqlType::INTERVAL_MINUTE => Err(py_datafusion_err(DataFusionError::NotImplemented(
494 format!("{sql_type:?}"),
495 ))),
496 SqlType::INTERVAL_MINUTE_SECOND => Err(py_datafusion_err(
497 DataFusionError::NotImplemented(format!("{sql_type:?}")),
498 )),
499 SqlType::INTERVAL_MONTH => Err(py_datafusion_err(DataFusionError::NotImplemented(
500 format!("{sql_type:?}"),
501 ))),
502 SqlType::INTERVAL_SECOND => Err(py_datafusion_err(DataFusionError::NotImplemented(
503 format!("{sql_type:?}"),
504 ))),
505 SqlType::INTERVAL_YEAR => Err(py_datafusion_err(DataFusionError::NotImplemented(
506 format!("{sql_type:?}"),
507 ))),
508 SqlType::INTERVAL_YEAR_MONTH => Err(py_datafusion_err(
509 DataFusionError::NotImplemented(format!("{sql_type:?}")),
510 )),
511 SqlType::MAP => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
512 "{sql_type:?}"
513 )))),
514 SqlType::MULTISET => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
515 "{sql_type:?}"
516 )))),
517 SqlType::NULL => Ok(DataTypeMap::new(
518 DataType::Null,
519 PythonType::None,
520 SqlType::NULL,
521 )),
522 SqlType::OTHER => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
523 "{sql_type:?}"
524 )))),
525 SqlType::REAL => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
526 "{sql_type:?}"
527 )))),
528 SqlType::ROW => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
529 "{sql_type:?}"
530 )))),
531 SqlType::SARG => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
532 "{sql_type:?}"
533 )))),
534 SqlType::SMALLINT => Ok(DataTypeMap::new(
535 DataType::Int16,
536 PythonType::Int,
537 SqlType::SMALLINT,
538 )),
539 SqlType::STRUCTURED => Err(py_datafusion_err(DataFusionError::NotImplemented(
540 format!("{sql_type:?}"),
541 ))),
542 SqlType::SYMBOL => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
543 "{sql_type:?}"
544 )))),
545 SqlType::TIME => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
546 "{sql_type:?}"
547 )))),
548 SqlType::TIME_WITH_LOCAL_TIME_ZONE => Err(py_datafusion_err(
549 DataFusionError::NotImplemented(format!("{sql_type:?}")),
550 )),
551 SqlType::TIMESTAMP => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
552 "{sql_type:?}"
553 )))),
554 SqlType::TIMESTAMP_WITH_LOCAL_TIME_ZONE => Err(py_datafusion_err(
555 DataFusionError::NotImplemented(format!("{sql_type:?}")),
556 )),
557 SqlType::TINYINT => Ok(DataTypeMap::new(
558 DataType::Int8,
559 PythonType::Int,
560 SqlType::TINYINT,
561 )),
562 SqlType::UNKNOWN => Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
563 "{sql_type:?}"
564 )))),
565 SqlType::VARBINARY => Ok(DataTypeMap::new(
566 DataType::LargeBinary,
567 PythonType::Bytes,
568 SqlType::VARBINARY,
569 )),
570 SqlType::VARCHAR => Ok(DataTypeMap::new(
571 DataType::Utf8,
572 PythonType::Str,
573 SqlType::VARCHAR,
574 )),
575 }
576 }
577
578 #[pyo3(name = "friendly_arrow_type_name")]
582 pub fn friendly_arrow_type_name(&self) -> PyResult<&str> {
583 Ok(match &self.arrow_type.data_type {
584 DataType::Null => "Null",
585 DataType::Boolean => "Boolean",
586 DataType::Int8 => "Int8",
587 DataType::Int16 => "Int16",
588 DataType::Int32 => "Int32",
589 DataType::Int64 => "Int64",
590 DataType::UInt8 => "UInt8",
591 DataType::UInt16 => "UInt16",
592 DataType::UInt32 => "UInt32",
593 DataType::UInt64 => "UInt64",
594 DataType::Float16 => "Float16",
595 DataType::Float32 => "Float32",
596 DataType::Float64 => "Float64",
597 DataType::Timestamp(_, _) => "Timestamp",
598 DataType::Date32 => "Date32",
599 DataType::Date64 => "Date64",
600 DataType::Time32(_) => "Time32",
601 DataType::Time64(_) => "Time64",
602 DataType::Duration(_) => "Duration",
603 DataType::Interval(_) => "Interval",
604 DataType::Binary => "Binary",
605 DataType::FixedSizeBinary(_) => "FixedSizeBinary",
606 DataType::LargeBinary => "LargeBinary",
607 DataType::Utf8 => "Utf8",
608 DataType::LargeUtf8 => "LargeUtf8",
609 DataType::List(_) => "List",
610 DataType::FixedSizeList(_, _) => "FixedSizeList",
611 DataType::LargeList(_) => "LargeList",
612 DataType::Struct(_) => "Struct",
613 DataType::Union(_, _) => "Union",
614 DataType::Dictionary(_, _) => "Dictionary",
615 DataType::Decimal128(_, _) => "Decimal128",
616 DataType::Decimal256(_, _) => "Decimal256",
617 DataType::Map(_, _) => "Map",
618 DataType::RunEndEncoded(_, _) => "RunEndEncoded",
619 DataType::BinaryView => "BinaryView",
620 DataType::Utf8View => "Utf8View",
621 DataType::ListView(_) => "ListView",
622 DataType::LargeListView(_) => "LargeListView",
623 })
624 }
625}
626
627#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
631#[pyclass(name = "DataType", module = "datafusion.common")]
632pub struct PyDataType {
633 pub data_type: DataType,
634}
635
636impl PyDataType {
637 pub fn py_map_from_arrow_type_str(arrow_str_type: String) -> PyResult<PyDataType> {
641 let arrow_str_type = match arrow_str_type.find('[') {
643 Some(index) => arrow_str_type[0..index].to_string(),
644 None => arrow_str_type, };
646
647 let arrow_dtype = match arrow_str_type.to_lowercase().as_str() {
648 "bool" => Ok(DataType::Boolean),
649 "boolean" => Ok(DataType::Boolean),
650 "uint8" => Ok(DataType::UInt8),
651 "uint16" => Ok(DataType::UInt16),
652 "uint32" => Ok(DataType::UInt32),
653 "uint64" => Ok(DataType::UInt64),
654 "int8" => Ok(DataType::Int8),
655 "int16" => Ok(DataType::Int16),
656 "int32" => Ok(DataType::Int32),
657 "int64" => Ok(DataType::Int64),
658 "float" => Ok(DataType::Float32),
659 "double" => Ok(DataType::Float64),
660 "float16" => Ok(DataType::Float16),
661 "float32" => Ok(DataType::Float32),
662 "float64" => Ok(DataType::Float64),
663 "datetime64" => Ok(DataType::Date64),
664 "object" => Ok(DataType::Utf8),
665 _ => Err(PyValueError::new_err(format!(
666 "Unable to determine Arrow Data Type from Arrow String type: {arrow_str_type:?}"
667 ))),
668 };
669 Ok(PyDataType {
670 data_type: arrow_dtype?,
671 })
672 }
673}
674
675impl From<PyDataType> for DataType {
676 fn from(data_type: PyDataType) -> DataType {
677 data_type.data_type
678 }
679}
680
681impl From<DataType> for PyDataType {
682 fn from(data_type: DataType) -> PyDataType {
683 PyDataType { data_type }
684 }
685}
686
687#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
689#[pyclass(eq, eq_int, name = "PythonType", module = "datafusion.common")]
690pub enum PythonType {
691 Array,
692 Bool,
693 Bytes,
694 Datetime,
695 Float,
696 Int,
697 List,
698 None,
699 Object,
700 Str,
701}
702
703#[allow(non_camel_case_types)]
707#[allow(clippy::upper_case_acronyms)]
708#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
709#[pyclass(eq, eq_int, name = "SqlType", module = "datafusion.common")]
710pub enum SqlType {
711 ANY,
712 ARRAY,
713 BIGINT,
714 BINARY,
715 BOOLEAN,
716 CHAR,
717 COLUMN_LIST,
718 CURSOR,
719 DATE,
720 DECIMAL,
721 DISTINCT,
722 DOUBLE,
723 DYNAMIC_STAR,
724 FLOAT,
725 GEOMETRY,
726 INTEGER,
727 INTERVAL,
728 INTERVAL_DAY,
729 INTERVAL_DAY_HOUR,
730 INTERVAL_DAY_MINUTE,
731 INTERVAL_DAY_SECOND,
732 INTERVAL_HOUR,
733 INTERVAL_HOUR_MINUTE,
734 INTERVAL_HOUR_SECOND,
735 INTERVAL_MINUTE,
736 INTERVAL_MINUTE_SECOND,
737 INTERVAL_MONTH,
738 INTERVAL_SECOND,
739 INTERVAL_YEAR,
740 INTERVAL_YEAR_MONTH,
741 MAP,
742 MULTISET,
743 NULL,
744 OTHER,
745 REAL,
746 ROW,
747 SARG,
748 SMALLINT,
749 STRUCTURED,
750 SYMBOL,
751 TIME,
752 TIME_WITH_LOCAL_TIME_ZONE,
753 TIMESTAMP,
754 TIMESTAMP_WITH_LOCAL_TIME_ZONE,
755 TINYINT,
756 UNKNOWN,
757 VARBINARY,
758 VARCHAR,
759}
760
761#[allow(non_camel_case_types)]
765#[allow(clippy::upper_case_acronyms)]
766#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
767#[pyclass(eq, eq_int, name = "NullTreatment", module = "datafusion.common")]
768pub enum NullTreatment {
769 IGNORE_NULLS,
770 RESPECT_NULLS,
771}
772
773impl From<NullTreatment> for DFNullTreatment {
774 fn from(null_treatment: NullTreatment) -> DFNullTreatment {
775 match null_treatment {
776 NullTreatment::IGNORE_NULLS => DFNullTreatment::IgnoreNulls,
777 NullTreatment::RESPECT_NULLS => DFNullTreatment::RespectNulls,
778 }
779 }
780}
781
782impl From<DFNullTreatment> for NullTreatment {
783 fn from(null_treatment: DFNullTreatment) -> NullTreatment {
784 match null_treatment {
785 DFNullTreatment::IgnoreNulls => NullTreatment::IGNORE_NULLS,
786 DFNullTreatment::RespectNulls => NullTreatment::RESPECT_NULLS,
787 }
788 }
789}