1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
use crate::prelude::*;
pub fn build_record_batch(
rows: &[HashMap<String, Value>],
schema: Arc<Schema>
) -> ArrowResult<RecordBatch> {
let mut builders: Vec<Box<dyn ArrayBuilder>> = Vec::new();
// Simplified builder creation - only use Int64, UInt64, Float64, or StringBuilder
for field in schema.fields() {
let builder: Box<dyn ArrayBuilder> = match field.data_type() {
ArrowDataType::Int64 => Box::new(Int64Builder::new()),
ArrowDataType::UInt64 => Box::new(UInt64Builder::new()),
ArrowDataType::Float64 => Box::new(Float64Builder::new()),
_ => Box::new(StringBuilder::new()), // Everything else becomes string
};
builders.push(builder);
}
for row in rows {
for (i, field) in schema.fields().iter().enumerate() {
let key = field.name();
let value = row.get(key);
match field.data_type() {
ArrowDataType::Int64 => {
let builder = builders[i]
.as_any_mut()
.downcast_mut::<Int64Builder>()
.expect("Expected Int64Builder");
match value {
Some(Value::Number(n)) => {
// Handle all possible number scenarios
if let Some(i) = n.as_i64() {
builder.append_value(i);
} else {
builder.append_null();
}
},
// Everything non-number becomes null
_ => builder.append_null(),
}
},
ArrowDataType::UInt64 => {
let builder = builders[i]
.as_any_mut()
.downcast_mut::<UInt64Builder>()
.expect("Expected UInt64Builder");
match value {
Some(Value::Number(n)) => {
// Only accept valid unsigned integers
if let Some(u) = n.as_u64() {
builder.append_value(u);
} else {
builder.append_null();
}
},
_ => builder.append_null(),
}
},
ArrowDataType::Float64 => {
let builder = builders[i]
.as_any_mut()
.downcast_mut::<Float64Builder>()
.expect("Expected Float64Builder");
match value {
Some(Value::Number(n)) => {
// Handle all possible float scenarios
if let Some(f) = n.as_f64() {
builder.append_value(f);
} else {
builder.append_null();
}
},
_ => builder.append_null(),
}
},
_ => {
// Default string handling - handles ALL other cases
let builder = builders[i]
.as_any_mut()
.downcast_mut::<StringBuilder>()
.expect("Expected StringBuilder");
match value {
Some(v) => {
// Comprehensive string conversion for ANY JSON value
let string_val = match v {
Value::Null => "null".to_string(),
Value::Bool(b) => b.to_string(),
Value::Number(n) => {
if n.is_f64() {
// Handle special float values
if let Some(f) = n.as_f64() {
if f.is_nan() {
"NaN".to_string()
} else if f.is_infinite() {
if f.is_sign_positive() {
"Infinity".to_string()
} else {
"-Infinity".to_string()
}
} else {
f.to_string()
}
} else {
n.to_string()
}
} else {
n.to_string()
}
},
Value::String(s) => {
// Handle potentially invalid UTF-8 or special characters
s.chars()
.map(|c| if c.is_control() {
format!("\\u{:04x}", c as u32)
} else {
c.to_string()
})
.collect()
},
Value::Array(arr) => {
// Safely handle nested arrays
serde_json::to_string(arr)
.unwrap_or_else(|_| "[]".to_string())
},
Value::Object(obj) => {
// Safely handle nested objects
serde_json::to_string(obj)
.unwrap_or_else(|_| "{}".to_string())
},
};
// Ensure the string is valid UTF-8
builder.append_value(&string_val);
},
None => builder.append_null(),
}
},
}
}
}
let arrays: Vec<ArrayRef> = builders.into_iter().map(|mut b| b.finish()).collect();
RecordBatch::try_new(schema.clone(), arrays)
}