llkv_executor/insert/
value_coercion.rs

1//! Helper functions for value coercion and data preparation used during inserts.
2
3use arrow::array::{
4    ArrayRef, BooleanBuilder, Date32Builder, Decimal128Array, Float64Builder, Int64Builder,
5    IntervalMonthDayNanoArray, StringBuilder,
6};
7use arrow::datatypes::{DataType, FieldRef, IntervalUnit};
8use llkv_compute::date::parse_date32_literal;
9use llkv_compute::scalar::decimal::{
10    align_decimal_to_scale, decimal_from_f64, decimal_from_i64, decimal_truthy,
11    truncate_decimal_to_i64,
12};
13use llkv_compute::scalar::interval::interval_value_to_arrow;
14use llkv_plan::PlanValue;
15use llkv_result::{Error, Result};
16use std::sync::Arc;
17
18use crate::{ExecutorColumn, ExecutorSchema};
19
20/// Resolve the user-specified column list for an INSERT statement into indexes
21/// of the executor schema. If no columns were provided, return the identity order.
22pub fn resolve_insert_columns(columns: &[String], schema: &ExecutorSchema) -> Result<Vec<usize>> {
23    if columns.is_empty() {
24        return Ok((0..schema.columns.len()).collect());
25    }
26
27    let mut resolved = Vec::with_capacity(columns.len());
28    for column in columns {
29        let normalized = column.to_ascii_lowercase();
30        let index = schema.lookup.get(&normalized).ok_or_else(|| {
31            Error::InvalidArgumentError(format!(
32                "Binder Error: does not have a column named '{}'",
33                column
34            ))
35        })?;
36        resolved.push(*index);
37    }
38    Ok(resolved)
39}
40
41/// Coerce a `PlanValue` into the Arrow data type required by the executor column.
42pub fn normalize_insert_value_for_column(
43    column: &ExecutorColumn,
44    value: PlanValue,
45) -> Result<PlanValue> {
46    match (&column.data_type, value) {
47        (_, PlanValue::Null) => Ok(PlanValue::Null),
48        (DataType::Int64, PlanValue::Integer(v)) => Ok(PlanValue::Integer(v)),
49        (DataType::Int64, PlanValue::Float(v)) => Ok(PlanValue::Integer(v as i64)),
50        (DataType::Int64, PlanValue::Decimal(decimal)) => {
51            let coerced = truncate_decimal_to_i64(decimal).map_err(|err| {
52                Error::InvalidArgumentError(format!(
53                    "cannot insert decimal literal {} into INT column '{}': {err}",
54                    decimal, column.name
55                ))
56            })?;
57            Ok(PlanValue::Integer(coerced))
58        }
59        (DataType::Int64, other) => Err(Error::InvalidArgumentError(format!(
60            "cannot insert {other:?} into INT column '{}'",
61            column.name
62        ))),
63        (DataType::Boolean, PlanValue::Integer(v)) => {
64            Ok(PlanValue::Integer(if v != 0 { 1 } else { 0 }))
65        }
66        (DataType::Boolean, PlanValue::Float(v)) => {
67            Ok(PlanValue::Integer(if v != 0.0 { 1 } else { 0 }))
68        }
69        (DataType::Boolean, PlanValue::Decimal(decimal)) => {
70            Ok(PlanValue::Integer(if decimal_truthy(decimal) {
71                1
72            } else {
73                0
74            }))
75        }
76        (DataType::Boolean, PlanValue::String(s)) => {
77            let normalized = s.trim().to_ascii_lowercase();
78            let value = match normalized.as_str() {
79                "true" | "t" | "1" => 1,
80                "false" | "f" | "0" => 0,
81                _ => {
82                    return Err(Error::InvalidArgumentError(format!(
83                        "cannot insert string '{}' into BOOLEAN column '{}'",
84                        s, column.name
85                    )));
86                }
87            };
88            Ok(PlanValue::Integer(value))
89        }
90        (DataType::Boolean, PlanValue::Struct(_)) => Err(Error::InvalidArgumentError(format!(
91            "cannot insert struct into BOOLEAN column '{}'",
92            column.name
93        ))),
94        (DataType::Float64, PlanValue::Integer(v)) => Ok(PlanValue::Float(v as f64)),
95        (DataType::Float64, PlanValue::Float(v)) => Ok(PlanValue::Float(v)),
96        (DataType::Float64, PlanValue::Decimal(decimal)) => Ok(PlanValue::Float(decimal.to_f64())),
97        (DataType::Float64, other) => Err(Error::InvalidArgumentError(format!(
98            "cannot insert {other:?} into DOUBLE column '{}'",
99            column.name
100        ))),
101        (DataType::Utf8, PlanValue::Integer(v)) => Ok(PlanValue::String(v.to_string())),
102        (DataType::Utf8, PlanValue::Float(v)) => Ok(PlanValue::String(v.to_string())),
103        (DataType::Utf8, PlanValue::Decimal(decimal)) => Ok(PlanValue::String(decimal.to_string())),
104        (DataType::Utf8, PlanValue::String(s)) => Ok(PlanValue::String(s)),
105        (DataType::Utf8, PlanValue::Struct(_)) => Err(Error::InvalidArgumentError(format!(
106            "cannot insert struct into STRING column '{}'",
107            column.name
108        ))),
109        (DataType::Date32, PlanValue::Date32(days)) => Ok(PlanValue::Date32(days)),
110        (DataType::Date32, PlanValue::Integer(days)) => {
111            let casted = i32::try_from(days).map_err(|_| {
112                Error::InvalidArgumentError(format!(
113                    "integer literal out of range for DATE column '{}'",
114                    column.name
115                ))
116            })?;
117            Ok(PlanValue::Date32(casted))
118        }
119        (DataType::Date32, PlanValue::String(text)) => {
120            let days = parse_date32_literal(&text)?;
121            Ok(PlanValue::Date32(days))
122        }
123        (DataType::Date32, PlanValue::Decimal(_)) => Err(Error::InvalidArgumentError(format!(
124            "cannot insert decimal literal into DATE column '{}'",
125            column.name
126        ))),
127        (DataType::Date32, other) => Err(Error::InvalidArgumentError(format!(
128            "cannot insert {other:?} into DATE column '{}'",
129            column.name
130        ))),
131        (DataType::Struct(_), PlanValue::Struct(map)) => Ok(PlanValue::Struct(map)),
132        (DataType::Struct(_), other) => Err(Error::InvalidArgumentError(format!(
133            "expected struct value for struct column '{}', got {other:?}",
134            column.name
135        ))),
136        (DataType::Interval(IntervalUnit::MonthDayNano), PlanValue::Interval(interval)) => {
137            Ok(PlanValue::Interval(interval))
138        }
139        (DataType::Interval(IntervalUnit::MonthDayNano), other) => {
140            Err(Error::InvalidArgumentError(format!(
141                "cannot insert {other:?} into INTERVAL column '{}'",
142                column.name
143            )))
144        }
145        (DataType::Decimal128(precision, scale), PlanValue::Decimal(decimal)) => {
146            let aligned = align_decimal_to_scale(decimal, *precision, *scale).map_err(|err| {
147                Error::InvalidArgumentError(format!(
148                    "decimal literal {} incompatible with DECIMAL({}, {}) column '{}': {err}",
149                    decimal, precision, scale, column.name
150                ))
151            })?;
152            Ok(PlanValue::Decimal(aligned))
153        }
154        (DataType::Decimal128(precision, scale), PlanValue::Integer(value)) => {
155            let decimal = decimal_from_i64(value, *precision, *scale).map_err(|err| {
156                Error::InvalidArgumentError(format!(
157                    "integer literal {value} incompatible with DECIMAL({}, {}) column '{}': {err}",
158                    precision, scale, column.name
159                ))
160            })?;
161            Ok(PlanValue::Decimal(decimal))
162        }
163        (DataType::Decimal128(precision, scale), PlanValue::Float(value)) => {
164            let decimal = decimal_from_f64(value, *precision, *scale).map_err(|err| {
165                Error::InvalidArgumentError(format!(
166                    "float literal {value} incompatible with DECIMAL({}, {}) column '{}': {err}",
167                    precision, scale, column.name
168                ))
169            })?;
170            Ok(PlanValue::Decimal(decimal))
171        }
172        (DataType::Decimal128(_, _), other) => Err(Error::InvalidArgumentError(format!(
173            "cannot insert {other:?} into DECIMAL column '{}'",
174            column.name
175        ))),
176        (other_type, other_value) => Err(Error::InvalidArgumentError(format!(
177            "unsupported Arrow data type {:?} for INSERT value {:?} in column '{}'",
178            other_type, other_value, column.name
179        ))),
180    }
181}
182
183/// Build an Arrow array that matches the executor column's data type from the provided values.
184pub fn build_array_for_column(dtype: &DataType, values: &[PlanValue]) -> Result<ArrayRef> {
185    match dtype {
186        DataType::Int64 => {
187            let mut builder = Int64Builder::with_capacity(values.len());
188            for value in values {
189                match value {
190                    PlanValue::Null => builder.append_null(),
191                    PlanValue::Integer(v) => builder.append_value(*v),
192                    PlanValue::Float(v) => builder.append_value(*v as i64),
193                    PlanValue::Decimal(decimal) => {
194                        let coerced = truncate_decimal_to_i64(*decimal).map_err(|err| {
195                            Error::InvalidArgumentError(format!(
196                                "cannot insert decimal literal {} into INT column: {err}",
197                                decimal
198                            ))
199                        })?;
200                        builder.append_value(coerced);
201                    }
202                    PlanValue::Date32(days) => builder.append_value(i64::from(*days)),
203                    PlanValue::String(_) | PlanValue::Struct(_) | PlanValue::Interval(_) => {
204                        return Err(Error::InvalidArgumentError(
205                            "cannot insert non-integer into INT column".into(),
206                        ));
207                    }
208                }
209            }
210            Ok(Arc::new(builder.finish()))
211        }
212        DataType::Boolean => {
213            let mut builder = BooleanBuilder::with_capacity(values.len());
214            for value in values {
215                match value {
216                    PlanValue::Null => builder.append_null(),
217                    PlanValue::Integer(v) => builder.append_value(*v != 0),
218                    PlanValue::Float(v) => builder.append_value(*v != 0.0),
219                    PlanValue::Decimal(decimal) => {
220                        builder.append_value(decimal_truthy(*decimal));
221                    }
222                    PlanValue::Date32(days) => builder.append_value(*days != 0),
223                    PlanValue::String(s) => {
224                        let normalized = s.trim().to_ascii_lowercase();
225                        match normalized.as_str() {
226                            "true" | "t" | "1" => builder.append_value(true),
227                            "false" | "f" | "0" => builder.append_value(false),
228                            _ => {
229                                return Err(Error::InvalidArgumentError(format!(
230                                    "cannot insert string '{}' into BOOLEAN column",
231                                    s
232                                )));
233                            }
234                        }
235                    }
236                    PlanValue::Struct(_) | PlanValue::Interval(_) => {
237                        return Err(Error::InvalidArgumentError(
238                            "cannot insert struct into BOOLEAN column".into(),
239                        ));
240                    }
241                }
242            }
243            Ok(Arc::new(builder.finish()))
244        }
245        DataType::Float64 => {
246            let mut builder = Float64Builder::with_capacity(values.len());
247            for value in values {
248                match value {
249                    PlanValue::Null => builder.append_null(),
250                    PlanValue::Integer(v) => builder.append_value(*v as f64),
251                    PlanValue::Float(v) => builder.append_value(*v),
252                    PlanValue::Decimal(decimal) => {
253                        builder.append_value(decimal.to_f64());
254                    }
255                    PlanValue::Date32(days) => builder.append_value(f64::from(*days)),
256                    PlanValue::String(_) | PlanValue::Struct(_) | PlanValue::Interval(_) => {
257                        return Err(Error::InvalidArgumentError(
258                            "cannot insert non-numeric into DOUBLE column".into(),
259                        ));
260                    }
261                }
262            }
263            Ok(Arc::new(builder.finish()))
264        }
265        DataType::Utf8 => {
266            let mut builder = StringBuilder::with_capacity(values.len(), values.len() * 8);
267            for value in values {
268                match value {
269                    PlanValue::Null => builder.append_null(),
270                    PlanValue::Integer(v) => builder.append_value(v.to_string()),
271                    PlanValue::Float(v) => builder.append_value(v.to_string()),
272                    PlanValue::Decimal(decimal) => {
273                        builder.append_value(decimal.to_string());
274                    }
275                    PlanValue::Date32(days) => builder.append_value(days.to_string()),
276                    PlanValue::String(s) => builder.append_value(s),
277                    PlanValue::Struct(_) | PlanValue::Interval(_) => {
278                        return Err(Error::InvalidArgumentError(
279                            "cannot insert struct into STRING column".into(),
280                        ));
281                    }
282                }
283            }
284            Ok(Arc::new(builder.finish()))
285        }
286        DataType::Date32 => {
287            let mut builder = Date32Builder::with_capacity(values.len());
288            for value in values {
289                match value {
290                    PlanValue::Null => builder.append_null(),
291                    PlanValue::Integer(days) => {
292                        let casted = i32::try_from(*days).map_err(|_| {
293                            Error::InvalidArgumentError(
294                                "integer literal out of range for DATE column".into(),
295                            )
296                        })?;
297                        builder.append_value(casted);
298                    }
299                    PlanValue::Date32(days) => builder.append_value(*days),
300                    PlanValue::Decimal(_) => {
301                        return Err(Error::InvalidArgumentError(
302                            "cannot insert decimal literal into DATE column".into(),
303                        ));
304                    }
305                    PlanValue::Float(_) | PlanValue::Struct(_) | PlanValue::Interval(_) => {
306                        return Err(Error::InvalidArgumentError(
307                            "cannot insert non-date value into DATE column".into(),
308                        ));
309                    }
310                    PlanValue::String(text) => {
311                        let days = parse_date32_literal(text)?;
312                        builder.append_value(days);
313                    }
314                }
315            }
316            Ok(Arc::new(builder.finish()))
317        }
318        DataType::Struct(fields) => {
319            use arrow::array::StructArray;
320            let mut field_arrays: Vec<(FieldRef, ArrayRef)> = Vec::with_capacity(fields.len());
321
322            for field in fields.iter() {
323                let field_name = field.name();
324                let field_type = field.data_type();
325                let mut field_values = Vec::with_capacity(values.len());
326
327                for value in values {
328                    match value {
329                        PlanValue::Null => field_values.push(PlanValue::Null),
330                        PlanValue::Struct(map) => {
331                            let field_value =
332                                map.get(field_name).cloned().unwrap_or(PlanValue::Null);
333                            field_values.push(field_value);
334                        }
335                        _ => {
336                            return Err(Error::InvalidArgumentError(format!(
337                                "expected struct value for struct column, got {:?}",
338                                value
339                            )));
340                        }
341                    }
342                }
343
344                let field_array = build_array_for_column(field_type, &field_values)?;
345                field_arrays.push((Arc::clone(field), field_array));
346            }
347
348            Ok(Arc::new(StructArray::from(field_arrays)))
349        }
350        DataType::Decimal128(precision, scale) => {
351            let mut raw_values: Vec<Option<i128>> = Vec::with_capacity(values.len());
352            for value in values {
353                let entry = match value {
354                    PlanValue::Null => None,
355                    PlanValue::Decimal(decimal) => {
356                        let aligned = align_decimal_to_scale(*decimal, *precision, *scale)
357                            .map_err(|err| {
358                                Error::InvalidArgumentError(format!(
359                                    "decimal literal {} incompatible with DECIMAL({}, {}): {err}",
360                                    decimal, precision, scale
361                                ))
362                            })?;
363                        Some(aligned.raw_value())
364                    }
365                    PlanValue::Integer(value) => {
366                        let decimal = decimal_from_i64(*value, *precision, *scale).map_err(|err| {
367                            Error::InvalidArgumentError(format!(
368                                "integer literal {value} incompatible with DECIMAL({}, {}): {err}",
369                                precision, scale
370                            ))
371                        })?;
372                        Some(decimal.raw_value())
373                    }
374                    PlanValue::Float(value) => {
375                        let decimal = decimal_from_f64(*value, *precision, *scale).map_err(|err| {
376                            Error::InvalidArgumentError(format!(
377                                "float literal {value} incompatible with DECIMAL({}, {}): {err}",
378                                precision, scale
379                            ))
380                        })?;
381                        Some(decimal.raw_value())
382                    }
383                    _ => {
384                        return Err(Error::InvalidArgumentError(
385                            "cannot insert non-decimal value into DECIMAL column".into(),
386                        ));
387                    }
388                };
389                raw_values.push(entry);
390            }
391
392            let array = Decimal128Array::from_iter(raw_values.into_iter())
393                .with_precision_and_scale(*precision, *scale)
394                .map_err(|err| {
395                    Error::InvalidArgumentError(format!("failed to build Decimal128 array: {err}"))
396                })?;
397            Ok(Arc::new(array) as ArrayRef)
398        }
399        DataType::Interval(IntervalUnit::MonthDayNano) => {
400            let mut converted: Vec<Option<_>> = Vec::with_capacity(values.len());
401            for value in values {
402                match value {
403                    PlanValue::Null => converted.push(None),
404                    PlanValue::Interval(interval) => {
405                        converted.push(Some(interval_value_to_arrow(*interval)))
406                    }
407                    other => {
408                        return Err(Error::InvalidArgumentError(format!(
409                            "cannot insert {other:?} into INTERVAL column"
410                        )));
411                    }
412                }
413            }
414            Ok(Arc::new(IntervalMonthDayNanoArray::from(converted)) as ArrayRef)
415        }
416        other => Err(Error::InvalidArgumentError(format!(
417            "unsupported Arrow data type for INSERT: {other:?}"
418        ))),
419    }
420}