sdf_metadata/metadata/metadata/
sdf_arrow_row.rs1use std::collections::HashSet;
2
3use anyhow::Result;
4use wit_encoder::{Field, Record};
5
6use sdf_common::render::wit_name_case;
7
8use crate::{
9 util::config_error::{ConfigError, INDENT},
10 wit::metadata::SdfArrowRow,
11};
12
13use super::sdf_type::SdfTypeValidationError;
14
15#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
16pub enum SdfArrowRowValidationError {
17 EmptyColumnName,
18 DuplicateColumnName(String),
19}
20
21impl From<Vec<SdfArrowRowValidationError>> for SdfTypeValidationError {
22 fn from(errs: Vec<SdfArrowRowValidationError>) -> Self {
23 Self::SdfArrowRow(errs)
24 }
25}
26
27#[allow(clippy::derivable_impls)]
28impl Default for SdfArrowRow {
29 fn default() -> Self {
30 SdfArrowRow {
31 ttl: None,
32 columns: vec![],
33 }
34 }
35}
36
37impl ConfigError for SdfArrowRowValidationError {
38 fn readable(&self, indents: usize) -> String {
39 let indent = INDENT.repeat(indents);
40
41 match self {
42 Self::EmptyColumnName => {
43 format!("{}Column name cannot be empty\n", indent)
44 }
45 Self::DuplicateColumnName(name) => {
46 format!(
47 "{}Column name `{}` is duplicated. Column names must be unique\n",
48 indent, name
49 )
50 }
51 }
52 }
53}
54
55impl SdfArrowRow {
56 pub fn validate(&self) -> Result<(), Vec<SdfArrowRowValidationError>> {
57 let mut errors = vec![];
58 let mut column_names = HashSet::new();
59
60 for column in &self.columns {
61 if column.name.is_empty() {
62 errors.push(SdfArrowRowValidationError::EmptyColumnName);
63 }
64
65 if column_names.contains(&column.name) {
66 errors.push(SdfArrowRowValidationError::DuplicateColumnName(
67 column.name.clone(),
68 ));
69 } else {
70 column_names.insert(&column.name);
71 }
72 }
73
74 if errors.is_empty() {
75 Ok(())
76 } else {
77 Err(errors)
78 }
79 }
80
81 pub fn wit_record(&self) -> Record {
82 let fields = self.columns.iter().map(|field| {
83 let name = wit_name_case(&field.name);
84
85 let ty = field.type_.wit_type();
86
87 Field::new(name, ty)
88 });
89
90 Record::new(fields)
91 }
92}
93
94#[cfg(test)]
95mod test {
96 use crate::{
97 metadata::metadata::sdf_arrow_row::SdfArrowRowValidationError,
98 util::config_error::ConfigError,
99 wit::metadata::{ArrowColumnKind, SdfArrowColumn, SdfArrowRow},
100 };
101
102 #[test]
103 fn test_validate_accepts_valid_arrow_row() {
104 let row = SdfArrowRow {
105 columns: vec![SdfArrowColumn {
106 name: "number".to_string(),
107 type_: ArrowColumnKind::S32,
108 }],
109 ..Default::default()
110 };
111
112 row.validate().expect("should validate");
113 }
114
115 #[test]
116 fn test_validate_rejects_invalid_arrow_row_column_name() {
117 let row = SdfArrowRow {
118 columns: vec![SdfArrowColumn {
119 name: "".to_string(),
120 type_: ArrowColumnKind::S32,
121 }],
122 ..Default::default()
123 };
124
125 let res = row
126 .validate()
127 .expect_err("should error empty arrow-row column");
128
129 assert!(res.contains(&SdfArrowRowValidationError::EmptyColumnName));
130 assert_eq!(res[0].readable(0), "Column name cannot be empty\n")
131 }
132
133 #[test]
134 fn test_validate_rejects_duplicate_arrow_row_column_names() {
135 let row = SdfArrowRow {
136 columns: vec![
137 SdfArrowColumn {
138 name: "my-column".to_string(),
139 type_: ArrowColumnKind::S32,
140 },
141 SdfArrowColumn {
142 name: "my-column".to_string(),
143 type_: ArrowColumnKind::S32,
144 },
145 ],
146 ..Default::default()
147 };
148
149 let res = row
150 .validate()
151 .expect_err("should error duplicate column names");
152
153 assert!(
154 res.contains(&SdfArrowRowValidationError::DuplicateColumnName(
155 "my-column".to_string()
156 ))
157 );
158 assert_eq!(
159 res[0].readable(0),
160 "Column name `my-column` is duplicated. Column names must be unique\n"
161 )
162 }
163}