Skip to main content

spark_ddl_parser/
types.rs

1//! Type definitions for DDL schema parser.
2//!
3//! Represents PySpark schema structures parsed from DDL strings.
4
5use std::fmt;
6
7#[cfg(feature = "serde")]
8use serde::{Deserialize, Serialize};
9
10/// Data type for a schema field.
11#[derive(Clone, Debug, PartialEq, Eq)]
12#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
13pub enum DataType {
14    /// Simple types like string, int, long, double, etc.
15    Simple {
16        /// Canonical type name (e.g. "string", "long", "integer").
17        type_name: String,
18    },
19    /// Decimal type with precision and scale.
20    Decimal {
21        /// Precision (default 10).
22        precision: u32,
23        /// Scale (default 0).
24        scale: u32,
25    },
26    /// Array type with element type.
27    Array {
28        /// Element type.
29        element_type: Box<DataType>,
30    },
31    /// Map type with key and value types.
32    Map {
33        /// Key type.
34        key_type: Box<DataType>,
35        /// Value type.
36        value_type: Box<DataType>,
37    },
38    /// Struct type containing fields.
39    Struct(StructType),
40}
41
42impl DataType {
43    /// Returns the type name string (e.g. "struct", "array", "long").
44    pub fn type_name(&self) -> &str {
45        match self {
46            DataType::Simple { type_name } => type_name,
47            DataType::Decimal { .. } => "decimal",
48            DataType::Array { .. } => "array",
49            DataType::Map { .. } => "map",
50            DataType::Struct(_) => "struct",
51        }
52    }
53}
54
55/// A field in a struct.
56#[derive(Clone, Debug, PartialEq, Eq)]
57#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
58pub struct StructField {
59    /// Field name.
60    pub name: String,
61    /// Field data type.
62    pub data_type: DataType,
63    /// Whether the field is nullable (default true, PySpark behavior).
64    pub nullable: bool,
65}
66
67/// A struct type containing a list of fields.
68#[derive(Clone, Debug, PartialEq, Eq)]
69#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
70pub struct StructType {
71    /// Always "struct".
72    pub type_name: String,
73    /// List of struct fields.
74    pub fields: Vec<StructField>,
75}
76
77impl StructType {
78    /// Creates a new struct type with the given fields.
79    pub fn new(fields: Vec<StructField>) -> Self {
80        Self {
81            type_name: "struct".to_string(),
82            fields,
83        }
84    }
85}
86
87impl fmt::Display for DataType {
88    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
89        match self {
90            DataType::Simple { type_name } => write!(f, "{}", type_name),
91            DataType::Decimal { precision, scale } => write!(f, "decimal({},{})", precision, scale),
92            DataType::Array { element_type } => write!(f, "array<{}>", element_type),
93            DataType::Map {
94                key_type,
95                value_type,
96            } => {
97                write!(f, "map<{},{}>", key_type, value_type)
98            }
99            DataType::Struct(_s) => write!(f, "struct<...>"),
100        }
101    }
102}
103
104#[cfg(test)]
105mod tests {
106    use super::*;
107
108    #[test]
109    fn test_data_type_type_name() {
110        assert_eq!(
111            DataType::Simple {
112                type_name: "long".into()
113            }
114            .type_name(),
115            "long"
116        );
117        assert_eq!(
118            DataType::Decimal {
119                precision: 10,
120                scale: 2
121            }
122            .type_name(),
123            "decimal"
124        );
125        assert_eq!(
126            DataType::Array {
127                element_type: Box::new(DataType::Simple {
128                    type_name: "string".into()
129                })
130            }
131            .type_name(),
132            "array"
133        );
134        assert_eq!(StructType::new(vec![]).type_name, "struct");
135    }
136}