llkv_executor/insert/
value_coercion.rs

1//! Helper functions for value coercion and data preparation used during inserts.
2
3use crate::utils::date::parse_date32_literal;
4use arrow::array::{
5    ArrayRef, BooleanBuilder, Date32Builder, Float64Builder, Int64Builder, StringBuilder,
6};
7use arrow::datatypes::{DataType, FieldRef};
8use llkv_plan::PlanValue;
9use llkv_result::{Error, Result};
10use std::sync::Arc;
11
12use crate::{ExecutorColumn, ExecutorSchema};
13
14/// Resolve the user-specified column list for an INSERT statement into indexes
15/// of the executor schema. If no columns were provided, return the identity order.
16pub fn resolve_insert_columns(columns: &[String], schema: &ExecutorSchema) -> Result<Vec<usize>> {
17    if columns.is_empty() {
18        return Ok((0..schema.columns.len()).collect());
19    }
20
21    let mut resolved = Vec::with_capacity(columns.len());
22    for column in columns {
23        let normalized = column.to_ascii_lowercase();
24        let index = schema.lookup.get(&normalized).ok_or_else(|| {
25            Error::InvalidArgumentError(format!(
26                "Binder Error: does not have a column named '{}'",
27                column
28            ))
29        })?;
30        resolved.push(*index);
31    }
32    Ok(resolved)
33}
34
35/// Coerce a `PlanValue` into the Arrow data type required by the executor column.
36pub fn normalize_insert_value_for_column(
37    column: &ExecutorColumn,
38    value: PlanValue,
39) -> Result<PlanValue> {
40    match (&column.data_type, value) {
41        (_, PlanValue::Null) => Ok(PlanValue::Null),
42        (DataType::Int64, PlanValue::Integer(v)) => Ok(PlanValue::Integer(v)),
43        (DataType::Int64, PlanValue::Float(v)) => Ok(PlanValue::Integer(v as i64)),
44        (DataType::Int64, other) => Err(Error::InvalidArgumentError(format!(
45            "cannot insert {other:?} into INT column '{}'",
46            column.name
47        ))),
48        (DataType::Boolean, PlanValue::Integer(v)) => {
49            Ok(PlanValue::Integer(if v != 0 { 1 } else { 0 }))
50        }
51        (DataType::Boolean, PlanValue::Float(v)) => {
52            Ok(PlanValue::Integer(if v != 0.0 { 1 } else { 0 }))
53        }
54        (DataType::Boolean, PlanValue::String(s)) => {
55            let normalized = s.trim().to_ascii_lowercase();
56            let value = match normalized.as_str() {
57                "true" | "t" | "1" => 1,
58                "false" | "f" | "0" => 0,
59                _ => {
60                    return Err(Error::InvalidArgumentError(format!(
61                        "cannot insert string '{}' into BOOLEAN column '{}'",
62                        s, column.name
63                    )));
64                }
65            };
66            Ok(PlanValue::Integer(value))
67        }
68        (DataType::Boolean, PlanValue::Struct(_)) => Err(Error::InvalidArgumentError(format!(
69            "cannot insert struct into BOOLEAN column '{}'",
70            column.name
71        ))),
72        (DataType::Float64, PlanValue::Integer(v)) => Ok(PlanValue::Float(v as f64)),
73        (DataType::Float64, PlanValue::Float(v)) => Ok(PlanValue::Float(v)),
74        (DataType::Float64, other) => Err(Error::InvalidArgumentError(format!(
75            "cannot insert {other:?} into DOUBLE column '{}'",
76            column.name
77        ))),
78        (DataType::Utf8, PlanValue::Integer(v)) => Ok(PlanValue::String(v.to_string())),
79        (DataType::Utf8, PlanValue::Float(v)) => Ok(PlanValue::String(v.to_string())),
80        (DataType::Utf8, PlanValue::String(s)) => Ok(PlanValue::String(s)),
81        (DataType::Utf8, PlanValue::Struct(_)) => Err(Error::InvalidArgumentError(format!(
82            "cannot insert struct into STRING column '{}'",
83            column.name
84        ))),
85        (DataType::Date32, PlanValue::Integer(days)) => {
86            let casted = i32::try_from(days).map_err(|_| {
87                Error::InvalidArgumentError(format!(
88                    "integer literal out of range for DATE column '{}'",
89                    column.name
90                ))
91            })?;
92            Ok(PlanValue::Integer(casted as i64))
93        }
94        (DataType::Date32, PlanValue::String(text)) => {
95            let days = parse_date32_literal(&text)?;
96            Ok(PlanValue::Integer(days as i64))
97        }
98        (DataType::Date32, other) => Err(Error::InvalidArgumentError(format!(
99            "cannot insert {other:?} into DATE column '{}'",
100            column.name
101        ))),
102        (DataType::Struct(_), PlanValue::Struct(map)) => Ok(PlanValue::Struct(map)),
103        (DataType::Struct(_), other) => Err(Error::InvalidArgumentError(format!(
104            "expected struct value for struct column '{}', got {other:?}",
105            column.name
106        ))),
107        (other_type, other_value) => Err(Error::InvalidArgumentError(format!(
108            "unsupported Arrow data type {:?} for INSERT value {:?} in column '{}'",
109            other_type, other_value, column.name
110        ))),
111    }
112}
113
114/// Build an Arrow array that matches the executor column's data type from the provided values.
115pub fn build_array_for_column(dtype: &DataType, values: &[PlanValue]) -> Result<ArrayRef> {
116    match dtype {
117        DataType::Int64 => {
118            let mut builder = Int64Builder::with_capacity(values.len());
119            for value in values {
120                match value {
121                    PlanValue::Null => builder.append_null(),
122                    PlanValue::Integer(v) => builder.append_value(*v),
123                    PlanValue::Float(v) => builder.append_value(*v as i64),
124                    PlanValue::String(_) | PlanValue::Struct(_) => {
125                        return Err(Error::InvalidArgumentError(
126                            "cannot insert non-integer into INT column".into(),
127                        ));
128                    }
129                }
130            }
131            Ok(Arc::new(builder.finish()))
132        }
133        DataType::Boolean => {
134            let mut builder = BooleanBuilder::with_capacity(values.len());
135            for value in values {
136                match value {
137                    PlanValue::Null => builder.append_null(),
138                    PlanValue::Integer(v) => builder.append_value(*v != 0),
139                    PlanValue::Float(v) => builder.append_value(*v != 0.0),
140                    PlanValue::String(s) => {
141                        let normalized = s.trim().to_ascii_lowercase();
142                        match normalized.as_str() {
143                            "true" | "t" | "1" => builder.append_value(true),
144                            "false" | "f" | "0" => builder.append_value(false),
145                            _ => {
146                                return Err(Error::InvalidArgumentError(format!(
147                                    "cannot insert string '{}' into BOOLEAN column",
148                                    s
149                                )));
150                            }
151                        }
152                    }
153                    PlanValue::Struct(_) => {
154                        return Err(Error::InvalidArgumentError(
155                            "cannot insert struct into BOOLEAN column".into(),
156                        ));
157                    }
158                }
159            }
160            Ok(Arc::new(builder.finish()))
161        }
162        DataType::Float64 => {
163            let mut builder = Float64Builder::with_capacity(values.len());
164            for value in values {
165                match value {
166                    PlanValue::Null => builder.append_null(),
167                    PlanValue::Integer(v) => builder.append_value(*v as f64),
168                    PlanValue::Float(v) => builder.append_value(*v),
169                    PlanValue::String(_) | PlanValue::Struct(_) => {
170                        return Err(Error::InvalidArgumentError(
171                            "cannot insert non-numeric into DOUBLE column".into(),
172                        ));
173                    }
174                }
175            }
176            Ok(Arc::new(builder.finish()))
177        }
178        DataType::Utf8 => {
179            let mut builder = StringBuilder::with_capacity(values.len(), values.len() * 8);
180            for value in values {
181                match value {
182                    PlanValue::Null => builder.append_null(),
183                    PlanValue::Integer(v) => builder.append_value(v.to_string()),
184                    PlanValue::Float(v) => builder.append_value(v.to_string()),
185                    PlanValue::String(s) => builder.append_value(s),
186                    PlanValue::Struct(_) => {
187                        return Err(Error::InvalidArgumentError(
188                            "cannot insert struct into STRING column".into(),
189                        ));
190                    }
191                }
192            }
193            Ok(Arc::new(builder.finish()))
194        }
195        DataType::Date32 => {
196            let mut builder = Date32Builder::with_capacity(values.len());
197            for value in values {
198                match value {
199                    PlanValue::Null => builder.append_null(),
200                    PlanValue::Integer(days) => {
201                        let casted = i32::try_from(*days).map_err(|_| {
202                            Error::InvalidArgumentError(
203                                "integer literal out of range for DATE column".into(),
204                            )
205                        })?;
206                        builder.append_value(casted);
207                    }
208                    PlanValue::Float(_) | PlanValue::Struct(_) => {
209                        return Err(Error::InvalidArgumentError(
210                            "cannot insert non-date value into DATE column".into(),
211                        ));
212                    }
213                    PlanValue::String(text) => {
214                        let days = parse_date32_literal(text)?;
215                        builder.append_value(days);
216                    }
217                }
218            }
219            Ok(Arc::new(builder.finish()))
220        }
221        DataType::Struct(fields) => {
222            use arrow::array::StructArray;
223            let mut field_arrays: Vec<(FieldRef, ArrayRef)> = Vec::with_capacity(fields.len());
224
225            for field in fields.iter() {
226                let field_name = field.name();
227                let field_type = field.data_type();
228                let mut field_values = Vec::with_capacity(values.len());
229
230                for value in values {
231                    match value {
232                        PlanValue::Null => field_values.push(PlanValue::Null),
233                        PlanValue::Struct(map) => {
234                            let field_value =
235                                map.get(field_name).cloned().unwrap_or(PlanValue::Null);
236                            field_values.push(field_value);
237                        }
238                        _ => {
239                            return Err(Error::InvalidArgumentError(format!(
240                                "expected struct value for struct column, got {:?}",
241                                value
242                            )));
243                        }
244                    }
245                }
246
247                let field_array = build_array_for_column(field_type, &field_values)?;
248                field_arrays.push((Arc::clone(field), field_array));
249            }
250
251            Ok(Arc::new(StructArray::from(field_arrays)))
252        }
253        other => Err(Error::InvalidArgumentError(format!(
254            "unsupported Arrow data type for INSERT: {other:?}"
255        ))),
256    }
257}