1use datafusion::arrow::array::Array;
19use datafusion::arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
20use datafusion::common::ScalarValue;
21use datafusion::logical_expr::expr::NullTreatment as DFNullTreatment;
22use pyo3::exceptions::{PyNotImplementedError, PyValueError};
23use pyo3::prelude::*;
24
25#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
26pub struct PyScalarValue(pub ScalarValue);
27
28impl From<ScalarValue> for PyScalarValue {
29 fn from(value: ScalarValue) -> Self {
30 Self(value)
31 }
32}
33impl From<PyScalarValue> for ScalarValue {
34 fn from(value: PyScalarValue) -> Self {
35 value.0
36 }
37}
38
39#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
40#[pyclass(frozen, eq, eq_int, name = "RexType", module = "datafusion.common")]
41pub enum RexType {
42 Alias,
43 Literal,
44 Call,
45 Reference,
46 ScalarSubquery,
47 Other,
48}
49
50#[derive(Debug, Clone)]
61#[pyclass(name = "DataTypeMap", module = "datafusion.common", subclass)]
62pub struct DataTypeMap {
63 #[pyo3(get, set)]
64 pub arrow_type: PyDataType,
65 #[pyo3(get, set)]
66 pub python_type: PythonType,
67 #[pyo3(get, set)]
68 pub sql_type: SqlType,
69}
70
71impl DataTypeMap {
72 fn new(arrow_type: DataType, python_type: PythonType, sql_type: SqlType) -> Self {
73 DataTypeMap {
74 arrow_type: PyDataType {
75 data_type: arrow_type,
76 },
77 python_type,
78 sql_type,
79 }
80 }
81
82 pub fn map_from_arrow_type(arrow_type: &DataType) -> Result<DataTypeMap, PyErr> {
83 match arrow_type {
84 DataType::Null => Ok(DataTypeMap::new(
85 DataType::Null,
86 PythonType::None,
87 SqlType::NULL,
88 )),
89 DataType::Boolean => Ok(DataTypeMap::new(
90 DataType::Boolean,
91 PythonType::Bool,
92 SqlType::BOOLEAN,
93 )),
94 DataType::Int8 => Ok(DataTypeMap::new(
95 DataType::Int8,
96 PythonType::Int,
97 SqlType::TINYINT,
98 )),
99 DataType::Int16 => Ok(DataTypeMap::new(
100 DataType::Int16,
101 PythonType::Int,
102 SqlType::SMALLINT,
103 )),
104 DataType::Int32 => Ok(DataTypeMap::new(
105 DataType::Int32,
106 PythonType::Int,
107 SqlType::INTEGER,
108 )),
109 DataType::Int64 => Ok(DataTypeMap::new(
110 DataType::Int64,
111 PythonType::Int,
112 SqlType::BIGINT,
113 )),
114 DataType::UInt8 => Ok(DataTypeMap::new(
115 DataType::UInt8,
116 PythonType::Int,
117 SqlType::TINYINT,
118 )),
119 DataType::UInt16 => Ok(DataTypeMap::new(
120 DataType::UInt16,
121 PythonType::Int,
122 SqlType::SMALLINT,
123 )),
124 DataType::UInt32 => Ok(DataTypeMap::new(
125 DataType::UInt32,
126 PythonType::Int,
127 SqlType::INTEGER,
128 )),
129 DataType::UInt64 => Ok(DataTypeMap::new(
130 DataType::UInt64,
131 PythonType::Int,
132 SqlType::BIGINT,
133 )),
134 DataType::Float16 => Ok(DataTypeMap::new(
135 DataType::Float16,
136 PythonType::Float,
137 SqlType::FLOAT,
138 )),
139 DataType::Float32 => Ok(DataTypeMap::new(
140 DataType::Float32,
141 PythonType::Float,
142 SqlType::FLOAT,
143 )),
144 DataType::Float64 => Ok(DataTypeMap::new(
145 DataType::Float64,
146 PythonType::Float,
147 SqlType::FLOAT,
148 )),
149 DataType::Timestamp(unit, tz) => Ok(DataTypeMap::new(
150 DataType::Timestamp(*unit, tz.clone()),
151 PythonType::Datetime,
152 SqlType::DATE,
153 )),
154 DataType::Date32 => Ok(DataTypeMap::new(
155 DataType::Date32,
156 PythonType::Datetime,
157 SqlType::DATE,
158 )),
159 DataType::Date64 => Ok(DataTypeMap::new(
160 DataType::Date64,
161 PythonType::Datetime,
162 SqlType::DATE,
163 )),
164 DataType::Time32(unit) => Ok(DataTypeMap::new(
165 DataType::Time32(*unit),
166 PythonType::Datetime,
167 SqlType::DATE,
168 )),
169 DataType::Time64(unit) => Ok(DataTypeMap::new(
170 DataType::Time64(*unit),
171 PythonType::Datetime,
172 SqlType::DATE,
173 )),
174 DataType::Duration(_) => Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))),
175 DataType::Interval(interval_unit) => Ok(DataTypeMap::new(
176 DataType::Interval(*interval_unit),
177 PythonType::Datetime,
178 match interval_unit {
179 IntervalUnit::DayTime => SqlType::INTERVAL_DAY,
180 IntervalUnit::MonthDayNano => SqlType::INTERVAL_MONTH,
181 IntervalUnit::YearMonth => SqlType::INTERVAL_YEAR_MONTH,
182 },
183 )),
184 DataType::Binary => Ok(DataTypeMap::new(
185 DataType::Binary,
186 PythonType::Bytes,
187 SqlType::BINARY,
188 )),
189 DataType::FixedSizeBinary(_) => {
190 Err(PyNotImplementedError::new_err(format!("{arrow_type:?}")))
191 }
192 DataType::LargeBinary => Ok(DataTypeMap::new(
193 DataType::LargeBinary,
194 PythonType::Bytes,
195 SqlType::BINARY,
196 )),
197 DataType::Utf8 => Ok(DataTypeMap::new(
198 DataType::Utf8,
199 PythonType::Str,
200 SqlType::VARCHAR,
201 )),
202 DataType::LargeUtf8 => Ok(DataTypeMap::new(
203 DataType::LargeUtf8,
204 PythonType::Str,
205 SqlType::VARCHAR,
206 )),
207 DataType::List(_) => Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))),
208 DataType::FixedSizeList(_, _) => {
209 Err(PyNotImplementedError::new_err(format!("{arrow_type:?}")))
210 }
211 DataType::LargeList(_) => {
212 Err(PyNotImplementedError::new_err(format!("{arrow_type:?}")))
213 }
214 DataType::Struct(_) => Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))),
215 DataType::Union(_, _) => Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))),
216 DataType::Dictionary(_, _) => {
217 Err(PyNotImplementedError::new_err(format!("{arrow_type:?}")))
218 }
219 DataType::Decimal32(precision, scale) => Ok(DataTypeMap::new(
220 DataType::Decimal32(*precision, *scale),
221 PythonType::Float,
222 SqlType::DECIMAL,
223 )),
224 DataType::Decimal64(precision, scale) => Ok(DataTypeMap::new(
225 DataType::Decimal64(*precision, *scale),
226 PythonType::Float,
227 SqlType::DECIMAL,
228 )),
229 DataType::Decimal128(precision, scale) => Ok(DataTypeMap::new(
230 DataType::Decimal128(*precision, *scale),
231 PythonType::Float,
232 SqlType::DECIMAL,
233 )),
234 DataType::Decimal256(precision, scale) => Ok(DataTypeMap::new(
235 DataType::Decimal256(*precision, *scale),
236 PythonType::Float,
237 SqlType::DECIMAL,
238 )),
239 DataType::Map(_, _) => Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))),
240 DataType::RunEndEncoded(_, _) => {
241 Err(PyNotImplementedError::new_err(format!("{arrow_type:?}")))
242 }
243 DataType::BinaryView => Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))),
244 DataType::Utf8View => Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))),
245 DataType::ListView(_) => Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))),
246 DataType::LargeListView(_) => {
247 Err(PyNotImplementedError::new_err(format!("{arrow_type:?}")))
248 }
249 }
250 }
251
252 pub fn map_from_scalar_value(scalar_val: &ScalarValue) -> Result<DataTypeMap, PyErr> {
254 DataTypeMap::map_from_arrow_type(&DataTypeMap::map_from_scalar_to_arrow(scalar_val)?)
255 }
256
257 pub fn map_from_scalar_to_arrow(scalar_val: &ScalarValue) -> Result<DataType, PyErr> {
259 match scalar_val {
260 ScalarValue::Boolean(_) => Ok(DataType::Boolean),
261 ScalarValue::Float16(_) => Ok(DataType::Float16),
262 ScalarValue::Float32(_) => Ok(DataType::Float32),
263 ScalarValue::Float64(_) => Ok(DataType::Float64),
264 ScalarValue::Decimal32(_, precision, scale) => {
265 Ok(DataType::Decimal32(*precision, *scale))
266 }
267 ScalarValue::Decimal64(_, precision, scale) => {
268 Ok(DataType::Decimal64(*precision, *scale))
269 }
270 ScalarValue::Decimal128(_, precision, scale) => {
271 Ok(DataType::Decimal128(*precision, *scale))
272 }
273 ScalarValue::Decimal256(_, precision, scale) => {
274 Ok(DataType::Decimal256(*precision, *scale))
275 }
276 ScalarValue::Dictionary(data_type, scalar_type) => {
277 Ok(DataType::Dictionary(
279 Box::new(*data_type.clone()),
280 Box::new(DataTypeMap::map_from_scalar_to_arrow(scalar_type)?),
281 ))
282 }
283 ScalarValue::Int8(_) => Ok(DataType::Int8),
284 ScalarValue::Int16(_) => Ok(DataType::Int16),
285 ScalarValue::Int32(_) => Ok(DataType::Int32),
286 ScalarValue::Int64(_) => Ok(DataType::Int64),
287 ScalarValue::UInt8(_) => Ok(DataType::UInt8),
288 ScalarValue::UInt16(_) => Ok(DataType::UInt16),
289 ScalarValue::UInt32(_) => Ok(DataType::UInt32),
290 ScalarValue::UInt64(_) => Ok(DataType::UInt64),
291 ScalarValue::Utf8(_) => Ok(DataType::Utf8),
292 ScalarValue::LargeUtf8(_) => Ok(DataType::LargeUtf8),
293 ScalarValue::Binary(_) => Ok(DataType::Binary),
294 ScalarValue::LargeBinary(_) => Ok(DataType::LargeBinary),
295 ScalarValue::Date32(_) => Ok(DataType::Date32),
296 ScalarValue::Date64(_) => Ok(DataType::Date64),
297 ScalarValue::Time32Second(_) => Ok(DataType::Time32(TimeUnit::Second)),
298 ScalarValue::Time32Millisecond(_) => Ok(DataType::Time32(TimeUnit::Millisecond)),
299 ScalarValue::Time64Microsecond(_) => Ok(DataType::Time64(TimeUnit::Microsecond)),
300 ScalarValue::Time64Nanosecond(_) => Ok(DataType::Time64(TimeUnit::Nanosecond)),
301 ScalarValue::Null => Ok(DataType::Null),
302 ScalarValue::TimestampSecond(_, tz) => {
303 Ok(DataType::Timestamp(TimeUnit::Second, tz.to_owned()))
304 }
305 ScalarValue::TimestampMillisecond(_, tz) => {
306 Ok(DataType::Timestamp(TimeUnit::Millisecond, tz.to_owned()))
307 }
308 ScalarValue::TimestampMicrosecond(_, tz) => {
309 Ok(DataType::Timestamp(TimeUnit::Microsecond, tz.to_owned()))
310 }
311 ScalarValue::TimestampNanosecond(_, tz) => {
312 Ok(DataType::Timestamp(TimeUnit::Nanosecond, tz.to_owned()))
313 }
314 ScalarValue::IntervalYearMonth(..) => Ok(DataType::Interval(IntervalUnit::YearMonth)),
315 ScalarValue::IntervalDayTime(..) => Ok(DataType::Interval(IntervalUnit::DayTime)),
316 ScalarValue::IntervalMonthDayNano(..) => {
317 Ok(DataType::Interval(IntervalUnit::MonthDayNano))
318 }
319 ScalarValue::List(arr) => Ok(arr.data_type().to_owned()),
320 ScalarValue::Struct(_fields) => Err(PyNotImplementedError::new_err(
321 "ScalarValue::Struct".to_string(),
322 )),
323 ScalarValue::FixedSizeBinary(size, _) => Ok(DataType::FixedSizeBinary(*size)),
324 ScalarValue::FixedSizeList(_array_ref) => {
325 Err(PyNotImplementedError::new_err(
329 "ScalarValue::FixedSizeList".to_string(),
330 ))
331 }
332 ScalarValue::LargeList(_) => Err(PyNotImplementedError::new_err(
333 "ScalarValue::LargeList".to_string(),
334 )),
335 ScalarValue::DurationSecond(_) => Ok(DataType::Duration(TimeUnit::Second)),
336 ScalarValue::DurationMillisecond(_) => Ok(DataType::Duration(TimeUnit::Millisecond)),
337 ScalarValue::DurationMicrosecond(_) => Ok(DataType::Duration(TimeUnit::Microsecond)),
338 ScalarValue::DurationNanosecond(_) => Ok(DataType::Duration(TimeUnit::Nanosecond)),
339 ScalarValue::Union(_, _, _) => Err(PyNotImplementedError::new_err(
340 "ScalarValue::LargeList".to_string(),
341 )),
342 ScalarValue::Utf8View(_) => Ok(DataType::Utf8View),
343 ScalarValue::BinaryView(_) => Ok(DataType::BinaryView),
344 ScalarValue::Map(_) => Err(PyNotImplementedError::new_err(
345 "ScalarValue::Map".to_string(),
346 )),
347 }
348 }
349}
350
351#[pymethods]
352impl DataTypeMap {
353 #[new]
354 pub fn py_new(arrow_type: PyDataType, python_type: PythonType, sql_type: SqlType) -> Self {
355 DataTypeMap {
356 arrow_type,
357 python_type,
358 sql_type,
359 }
360 }
361
362 #[staticmethod]
363 #[pyo3(name = "from_parquet_type_str")]
364 pub fn py_map_from_parquet_type_str(parquet_str_type: String) -> PyResult<DataTypeMap> {
368 let arrow_dtype = match parquet_str_type.to_lowercase().as_str() {
369 "boolean" => Ok(DataType::Boolean),
370 "int32" => Ok(DataType::Int32),
371 "int64" => Ok(DataType::Int64),
372 "int96" => {
373 Ok(DataType::Timestamp(TimeUnit::Nanosecond, None))
375 }
376 "float" => Ok(DataType::Float32),
377 "double" => Ok(DataType::Float64),
378 "byte_array" => Ok(DataType::Utf8),
379 _ => Err(PyValueError::new_err(format!(
380 "Unable to determine Arrow Data Type from Parquet String type: {parquet_str_type:?}"
381 ))),
382 };
383 DataTypeMap::map_from_arrow_type(&arrow_dtype?)
384 }
385
386 #[staticmethod]
387 #[pyo3(name = "arrow")]
388 pub fn py_map_from_arrow_type(arrow_type: &PyDataType) -> PyResult<DataTypeMap> {
389 DataTypeMap::map_from_arrow_type(&arrow_type.data_type)
390 }
391
392 #[staticmethod]
393 #[pyo3(name = "arrow_str")]
394 pub fn py_map_from_arrow_type_str(arrow_type_str: String) -> PyResult<DataTypeMap> {
395 let data_type = PyDataType::py_map_from_arrow_type_str(arrow_type_str);
396 DataTypeMap::map_from_arrow_type(&data_type?.data_type)
397 }
398
399 #[staticmethod]
400 #[pyo3(name = "sql")]
401 pub fn py_map_from_sql_type(sql_type: &SqlType) -> PyResult<DataTypeMap> {
402 match sql_type {
403 SqlType::ANY => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
404 SqlType::ARRAY => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
405 SqlType::BIGINT => Ok(DataTypeMap::new(
406 DataType::Int64,
407 PythonType::Int,
408 SqlType::BIGINT,
409 )),
410 SqlType::BINARY => Ok(DataTypeMap::new(
411 DataType::Binary,
412 PythonType::Bytes,
413 SqlType::BINARY,
414 )),
415 SqlType::BOOLEAN => Ok(DataTypeMap::new(
416 DataType::Boolean,
417 PythonType::Bool,
418 SqlType::BOOLEAN,
419 )),
420 SqlType::CHAR => Ok(DataTypeMap::new(
421 DataType::UInt8,
422 PythonType::Int,
423 SqlType::CHAR,
424 )),
425 SqlType::COLUMN_LIST => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
426 SqlType::CURSOR => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
427 SqlType::DATE => Ok(DataTypeMap::new(
428 DataType::Date64,
429 PythonType::Datetime,
430 SqlType::DATE,
431 )),
432 SqlType::DECIMAL => Ok(DataTypeMap::new(
433 DataType::Decimal128(1, 1),
434 PythonType::Float,
435 SqlType::DECIMAL,
436 )),
437 SqlType::DISTINCT => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
438 SqlType::DOUBLE => Ok(DataTypeMap::new(
439 DataType::Decimal256(1, 1),
440 PythonType::Float,
441 SqlType::DOUBLE,
442 )),
443 SqlType::DYNAMIC_STAR => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
444 SqlType::FLOAT => Ok(DataTypeMap::new(
445 DataType::Decimal128(1, 1),
446 PythonType::Float,
447 SqlType::FLOAT,
448 )),
449 SqlType::GEOMETRY => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
450 SqlType::INTEGER => Ok(DataTypeMap::new(
451 DataType::Int8,
452 PythonType::Int,
453 SqlType::INTEGER,
454 )),
455 SqlType::INTERVAL => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
456 SqlType::INTERVAL_DAY => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
457 SqlType::INTERVAL_DAY_HOUR => {
458 Err(PyNotImplementedError::new_err(format!("{sql_type:?}")))
459 }
460 SqlType::INTERVAL_DAY_MINUTE => {
461 Err(PyNotImplementedError::new_err(format!("{sql_type:?}")))
462 }
463 SqlType::INTERVAL_DAY_SECOND => {
464 Err(PyNotImplementedError::new_err(format!("{sql_type:?}")))
465 }
466 SqlType::INTERVAL_HOUR => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
467 SqlType::INTERVAL_HOUR_MINUTE => {
468 Err(PyNotImplementedError::new_err(format!("{sql_type:?}")))
469 }
470 SqlType::INTERVAL_HOUR_SECOND => {
471 Err(PyNotImplementedError::new_err(format!("{sql_type:?}")))
472 }
473 SqlType::INTERVAL_MINUTE => {
474 Err(PyNotImplementedError::new_err(format!("{sql_type:?}")))
475 }
476 SqlType::INTERVAL_MINUTE_SECOND => {
477 Err(PyNotImplementedError::new_err(format!("{sql_type:?}")))
478 }
479 SqlType::INTERVAL_MONTH => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
480 SqlType::INTERVAL_SECOND => {
481 Err(PyNotImplementedError::new_err(format!("{sql_type:?}")))
482 }
483 SqlType::INTERVAL_YEAR => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
484 SqlType::INTERVAL_YEAR_MONTH => {
485 Err(PyNotImplementedError::new_err(format!("{sql_type:?}")))
486 }
487 SqlType::MAP => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
488 SqlType::MULTISET => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
489 SqlType::NULL => Ok(DataTypeMap::new(
490 DataType::Null,
491 PythonType::None,
492 SqlType::NULL,
493 )),
494 SqlType::OTHER => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
495 SqlType::REAL => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
496 SqlType::ROW => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
497 SqlType::SARG => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
498 SqlType::SMALLINT => Ok(DataTypeMap::new(
499 DataType::Int16,
500 PythonType::Int,
501 SqlType::SMALLINT,
502 )),
503 SqlType::STRUCTURED => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
504 SqlType::SYMBOL => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
505 SqlType::TIME => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
506 SqlType::TIME_WITH_LOCAL_TIME_ZONE => {
507 Err(PyNotImplementedError::new_err(format!("{sql_type:?}")))
508 }
509 SqlType::TIMESTAMP => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
510 SqlType::TIMESTAMP_WITH_LOCAL_TIME_ZONE => {
511 Err(PyNotImplementedError::new_err(format!("{sql_type:?}")))
512 }
513 SqlType::TINYINT => Ok(DataTypeMap::new(
514 DataType::Int8,
515 PythonType::Int,
516 SqlType::TINYINT,
517 )),
518 SqlType::UNKNOWN => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))),
519 SqlType::VARBINARY => Ok(DataTypeMap::new(
520 DataType::LargeBinary,
521 PythonType::Bytes,
522 SqlType::VARBINARY,
523 )),
524 SqlType::VARCHAR => Ok(DataTypeMap::new(
525 DataType::Utf8,
526 PythonType::Str,
527 SqlType::VARCHAR,
528 )),
529 }
530 }
531
532 #[pyo3(name = "friendly_arrow_type_name")]
536 pub fn friendly_arrow_type_name(&self) -> PyResult<&str> {
537 Ok(match &self.arrow_type.data_type {
538 DataType::Null => "Null",
539 DataType::Boolean => "Boolean",
540 DataType::Int8 => "Int8",
541 DataType::Int16 => "Int16",
542 DataType::Int32 => "Int32",
543 DataType::Int64 => "Int64",
544 DataType::UInt8 => "UInt8",
545 DataType::UInt16 => "UInt16",
546 DataType::UInt32 => "UInt32",
547 DataType::UInt64 => "UInt64",
548 DataType::Float16 => "Float16",
549 DataType::Float32 => "Float32",
550 DataType::Float64 => "Float64",
551 DataType::Timestamp(_, _) => "Timestamp",
552 DataType::Date32 => "Date32",
553 DataType::Date64 => "Date64",
554 DataType::Time32(_) => "Time32",
555 DataType::Time64(_) => "Time64",
556 DataType::Duration(_) => "Duration",
557 DataType::Interval(_) => "Interval",
558 DataType::Binary => "Binary",
559 DataType::FixedSizeBinary(_) => "FixedSizeBinary",
560 DataType::LargeBinary => "LargeBinary",
561 DataType::Utf8 => "Utf8",
562 DataType::LargeUtf8 => "LargeUtf8",
563 DataType::List(_) => "List",
564 DataType::FixedSizeList(_, _) => "FixedSizeList",
565 DataType::LargeList(_) => "LargeList",
566 DataType::Struct(_) => "Struct",
567 DataType::Union(_, _) => "Union",
568 DataType::Dictionary(_, _) => "Dictionary",
569 DataType::Decimal32(_, _) => "Decimal32",
570 DataType::Decimal64(_, _) => "Decimal64",
571 DataType::Decimal128(_, _) => "Decimal128",
572 DataType::Decimal256(_, _) => "Decimal256",
573 DataType::Map(_, _) => "Map",
574 DataType::RunEndEncoded(_, _) => "RunEndEncoded",
575 DataType::BinaryView => "BinaryView",
576 DataType::Utf8View => "Utf8View",
577 DataType::ListView(_) => "ListView",
578 DataType::LargeListView(_) => "LargeListView",
579 })
580 }
581}
582
583#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
587#[pyclass(frozen, name = "DataType", module = "datafusion.common")]
588pub struct PyDataType {
589 pub data_type: DataType,
590}
591
592impl PyDataType {
593 pub fn py_map_from_arrow_type_str(arrow_str_type: String) -> PyResult<PyDataType> {
597 let arrow_str_type = match arrow_str_type.find('[') {
599 Some(index) => arrow_str_type[0..index].to_string(),
600 None => arrow_str_type, };
602
603 let arrow_dtype = match arrow_str_type.to_lowercase().as_str() {
604 "bool" => Ok(DataType::Boolean),
605 "boolean" => Ok(DataType::Boolean),
606 "uint8" => Ok(DataType::UInt8),
607 "uint16" => Ok(DataType::UInt16),
608 "uint32" => Ok(DataType::UInt32),
609 "uint64" => Ok(DataType::UInt64),
610 "int8" => Ok(DataType::Int8),
611 "int16" => Ok(DataType::Int16),
612 "int32" => Ok(DataType::Int32),
613 "int64" => Ok(DataType::Int64),
614 "float" => Ok(DataType::Float32),
615 "double" => Ok(DataType::Float64),
616 "float16" => Ok(DataType::Float16),
617 "float32" => Ok(DataType::Float32),
618 "float64" => Ok(DataType::Float64),
619 "datetime64" => Ok(DataType::Date64),
620 "object" => Ok(DataType::Utf8),
621 _ => Err(PyValueError::new_err(format!(
622 "Unable to determine Arrow Data Type from Arrow String type: {arrow_str_type:?}"
623 ))),
624 };
625 Ok(PyDataType {
626 data_type: arrow_dtype?,
627 })
628 }
629}
630
631impl From<PyDataType> for DataType {
632 fn from(data_type: PyDataType) -> DataType {
633 data_type.data_type
634 }
635}
636
637impl From<DataType> for PyDataType {
638 fn from(data_type: DataType) -> PyDataType {
639 PyDataType { data_type }
640 }
641}
642
643#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
645#[pyclass(frozen, eq, eq_int, name = "PythonType", module = "datafusion.common")]
646pub enum PythonType {
647 Array,
648 Bool,
649 Bytes,
650 Datetime,
651 Float,
652 Int,
653 List,
654 None,
655 Object,
656 Str,
657}
658
659#[allow(non_camel_case_types)]
663#[allow(clippy::upper_case_acronyms)]
664#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
665#[pyclass(frozen, eq, eq_int, name = "SqlType", module = "datafusion.common")]
666pub enum SqlType {
667 ANY,
668 ARRAY,
669 BIGINT,
670 BINARY,
671 BOOLEAN,
672 CHAR,
673 COLUMN_LIST,
674 CURSOR,
675 DATE,
676 DECIMAL,
677 DISTINCT,
678 DOUBLE,
679 DYNAMIC_STAR,
680 FLOAT,
681 GEOMETRY,
682 INTEGER,
683 INTERVAL,
684 INTERVAL_DAY,
685 INTERVAL_DAY_HOUR,
686 INTERVAL_DAY_MINUTE,
687 INTERVAL_DAY_SECOND,
688 INTERVAL_HOUR,
689 INTERVAL_HOUR_MINUTE,
690 INTERVAL_HOUR_SECOND,
691 INTERVAL_MINUTE,
692 INTERVAL_MINUTE_SECOND,
693 INTERVAL_MONTH,
694 INTERVAL_SECOND,
695 INTERVAL_YEAR,
696 INTERVAL_YEAR_MONTH,
697 MAP,
698 MULTISET,
699 NULL,
700 OTHER,
701 REAL,
702 ROW,
703 SARG,
704 SMALLINT,
705 STRUCTURED,
706 SYMBOL,
707 TIME,
708 TIME_WITH_LOCAL_TIME_ZONE,
709 TIMESTAMP,
710 TIMESTAMP_WITH_LOCAL_TIME_ZONE,
711 TINYINT,
712 UNKNOWN,
713 VARBINARY,
714 VARCHAR,
715}
716
717#[allow(non_camel_case_types)]
721#[allow(clippy::upper_case_acronyms)]
722#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
723#[pyclass(
724 frozen,
725 eq,
726 eq_int,
727 name = "NullTreatment",
728 module = "datafusion.common"
729)]
730pub enum NullTreatment {
731 IGNORE_NULLS,
732 RESPECT_NULLS,
733}
734
735impl From<NullTreatment> for DFNullTreatment {
736 fn from(null_treatment: NullTreatment) -> DFNullTreatment {
737 match null_treatment {
738 NullTreatment::IGNORE_NULLS => DFNullTreatment::IgnoreNulls,
739 NullTreatment::RESPECT_NULLS => DFNullTreatment::RespectNulls,
740 }
741 }
742}
743
744impl From<DFNullTreatment> for NullTreatment {
745 fn from(null_treatment: DFNullTreatment) -> NullTreatment {
746 match null_treatment {
747 DFNullTreatment::IgnoreNulls => NullTreatment::IGNORE_NULLS,
748 DFNullTreatment::RespectNulls => NullTreatment::RESPECT_NULLS,
749 }
750 }
751}