formualizer_eval/function.rs
1//! formualizer-eval/src/function.rs
2// New home for the core `Function` trait and its capability flags.
3
4use core::panic;
5
6use crate::{args::ArgSchema, traits::ArgumentHandle};
7use formualizer_common::{ExcelError, LiteralValue};
8
9bitflags::bitflags! {
10 /// Describes the capabilities and properties of a function.
11 ///
12 /// This allows the engine to select optimal evaluation paths (e.g., vectorized,
13 /// parallel, GPU) and to enforce semantic contracts at compile time.
14 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
15 pub struct FnCaps: u16 {
16 // --- Semantics ---
17 /// The function always produces the same output for the same input and has no
18 /// side effects. This is the default for most functions.
19 const PURE = 0b0000_0000_0001;
20 /// The function's output can change even with the same inputs (e.g., `RAND()`,
21 /// `NOW()`). Volatile functions are re-evaluated on every sheet change.
22 const VOLATILE = 0b0000_0000_0010;
23
24 // --- Shape / Evaluation Strategy ---
25 /// The function reduces a range of inputs to a single value (e.g., `SUM`, `AVERAGE`).
26 /// Can be implemented with `eval_fold`.
27 const REDUCTION = 0b0000_0000_0100;
28 /// The function operates on each element of its input ranges independently
29 /// (e.g., `SIN`, `ABS`). Can be implemented with `eval_map`.
30 const ELEMENTWISE = 0b0000_0000_1000;
31 /// The function operates on a sliding window over its input (e.g., `MOVING_AVERAGE`).
32 /// Can be implemented with `eval_window`.
33 const WINDOWED = 0b0000_0001_0000;
34 /// The function performs a lookup or search operation (e.g., `VLOOKUP`).
35 const LOOKUP = 0b0000_0010_0000;
36
37 // --- Input Data Types ---
38 /// The function primarily operates on numbers. The engine can prepare
39 /// optimized numeric stripes (`&[f64]`) for it.
40 const NUMERIC_ONLY = 0b0000_0100_0000;
41 /// The function primarily operates on booleans.
42 const BOOL_ONLY = 0b0000_1000_0000;
43
44 // --- Backend Optimizations ---
45 /// The function has an implementation suitable for SIMD vectorization.
46 const SIMD_OK = 0b0001_0000_0000;
47 /// The function can process input as a stream, without materializing the
48 /// entire range in memory.
49 const STREAM_OK = 0b0010_0000_0000;
50 /// The function has a GPU-accelerated implementation.
51 const GPU_OK = 0b0100_0000_0000;
52
53 // --- Reference semantics ---
54 /// The function can return a reference (to a cell/range/table) when
55 /// evaluated in a reference context. When used in a value context,
56 /// engines may materialize the reference to a `LiteralValue`.
57 const RETURNS_REFERENCE = 0b1000_0000_0000;
58
59 // --- Planning / Interpreter parallelism hints ---
60 /// The function enforces left-to-right evaluation and early-exit semantics.
61 /// The planner must not evaluate arguments in parallel nor reorder them.
62 const SHORT_CIRCUIT = 0b0001_0000_0000_0000;
63 /// It is safe and potentially profitable to evaluate arguments in parallel.
64 /// The engine should still fold results in argument order for determinism.
65 const PARALLEL_ARGS = 0b0010_0000_0000_0000;
66 /// It is safe to chunk and process input windows in parallel (e.g., SUMIFS).
67 const PARALLEL_CHUNKS= 0b0100_0000_0000_0000;
68 }
69}
70
71// --- Fast-Path Evaluation Contexts ---
72
73use crate::traits::FunctionContext;
74use bumpalo::Bump;
75
76/// A simple slice of homogeneous values for efficient iteration
77pub struct SliceStripe<'a> {
78 pub head: &'a [LiteralValue],
79}
80
81/// Context for `eval_fold` (Reduction operations).
82/// Provides efficient iteration over input ranges for fold/reduce operations.
83pub trait FnFoldCtx {
84 /// Visit numeric chunks packed from all range arguments; no materialization required.
85 fn for_each_numeric_chunk(
86 &mut self,
87 min_chunk: usize,
88 f: &mut dyn FnMut(crate::stripes::NumericChunk) -> Result<(), ExcelError>,
89 ) -> Result<(), ExcelError>;
90
91 /// Visit cells (coerced via range visitors) in row-major order.
92 fn for_each_cell(
93 &mut self,
94 f: &mut dyn FnMut(&LiteralValue) -> Result<(), ExcelError>,
95 ) -> Result<(), ExcelError>;
96
97 /// Return accumulated result (for two-pass folds like AVERAGE).
98 fn write_result(&mut self, v: LiteralValue);
99
100 /// Access original argument handles (for functions needing parameter scalars like k in LARGE/SMALL)
101 fn args(&self) -> &[ArgumentHandle<'_, '_>];
102}
103
104/// Concrete implementation of FnFoldCtx
105pub struct SimpleFoldCtx<'a, 'b> {
106 args: &'a [ArgumentHandle<'a, 'b>],
107 _ctx: &'a dyn FunctionContext,
108 result: Option<LiteralValue>,
109 /// Temporary arena for allocating iteration data
110 arena: Bump,
111}
112
113impl<'a, 'b> SimpleFoldCtx<'a, 'b> {
114 pub fn new(args: &'a [ArgumentHandle<'a, 'b>], ctx: &'a dyn FunctionContext) -> Self {
115 Self {
116 args,
117 _ctx: ctx,
118 result: None,
119 arena: Bump::new(),
120 }
121 }
122
123 pub fn take_result(self) -> Option<LiteralValue> {
124 self.result
125 }
126}
127
128impl<'a, 'b> FnFoldCtx for SimpleFoldCtx<'a, 'b> {
129 fn for_each_numeric_chunk(
130 &mut self,
131 min_chunk: usize,
132 f: &mut dyn FnMut(crate::stripes::NumericChunk) -> Result<(), ExcelError>,
133 ) -> Result<(), ExcelError> {
134 for arg in self.args {
135 match arg.range_view() {
136 Ok(view) => {
137 view.numbers_chunked(
138 crate::args::CoercionPolicy::NumberLenientText,
139 min_chunk,
140 f,
141 )?;
142 }
143 Err(_e_rv) => {
144 // Fall back to scalar value; propagate errors
145 match arg.value() {
146 Ok(value) => {
147 let as_num = match value.as_ref() {
148 LiteralValue::Error(e) => {
149 return Err(e.clone());
150 }
151 other => crate::coercion::to_number_lenient_with_locale(
152 other,
153 &self._ctx.locale(),
154 )
155 .ok(),
156 };
157 if let Some(n) = as_num {
158 let one = [n];
159 f(crate::stripes::NumericChunk {
160 data: &one,
161 validity: None,
162 })?;
163 }
164 }
165 Err(e_val) => {
166 // Both range_view and value resolution failed → propagate error
167 return Err(e_val);
168 }
169 }
170 }
171 }
172 }
173 Ok(())
174 }
175
176 fn for_each_cell(
177 &mut self,
178 f: &mut dyn FnMut(&LiteralValue) -> Result<(), ExcelError>,
179 ) -> Result<(), ExcelError> {
180 for arg in self.args {
181 if let Ok(view) = arg.range_view() {
182 view.for_each_cell(f)?;
183 } else if let Ok(value) = arg.value() {
184 f(value.as_ref())?;
185 }
186 }
187 Ok(())
188 }
189
190 fn write_result(&mut self, v: LiteralValue) {
191 self.result = Some(v);
192 }
193
194 fn args(&self) -> &[ArgumentHandle<'_, '_>] {
195 self.args
196 }
197}
198
199/// Context for `eval_map` (Element-wise operations).
200pub trait FnMapCtx {
201 /// Whether inputs indicate an array/range context. If false, callers should fall back to scalar.
202 fn is_array_context(&self) -> bool;
203
204 /// Apply a unary numeric mapping over the broadcasted input. The closure should return the mapped cell.
205 fn map_unary_numeric(
206 &mut self,
207 f: &mut dyn FnMut(f64) -> Result<LiteralValue, ExcelError>,
208 ) -> Result<(), ExcelError>;
209
210 /// Apply a binary numeric mapping over the broadcasted inputs (first two args).
211 fn map_binary_numeric(
212 &mut self,
213 f: &mut dyn FnMut(f64, f64) -> Result<LiteralValue, ExcelError>,
214 ) -> Result<(), ExcelError>;
215
216 /// Finalize and retrieve the output value (typically an Array). Implementations may move out internal buffers.
217 fn finalize(&mut self) -> LiteralValue;
218}
219
220// Windowed functions use the trait from window_ctx module.
221
222/// Revised, object-safe trait for all Excel-style functions.
223///
224/// This trait uses a capability-based model (`FnCaps`) to declare function
225/// properties, enabling the evaluation engine to select the most optimal
226/// execution path (e.g., scalar, vectorized, parallel).
227pub trait Function: Send + Sync + 'static {
228 /// Capability flags for this function
229 fn caps(&self) -> FnCaps {
230 FnCaps::PURE
231 }
232
233 fn name(&self) -> &'static str;
234 fn namespace(&self) -> &'static str {
235 ""
236 }
237 fn min_args(&self) -> usize {
238 0
239 }
240 fn variadic(&self) -> bool {
241 false
242 }
243 fn volatile(&self) -> bool {
244 self.caps().contains(FnCaps::VOLATILE)
245 }
246 fn arg_schema(&self) -> &'static [ArgSchema] {
247 if self.min_args() > 0 {
248 panic!("Non-zero min_args must have a valid arg_schema");
249 } else {
250 &[]
251 }
252 }
253
254 /// Optional list of additional alias names (case-insensitive) that should resolve to this
255 /// function. Default: empty slice. Implementors can override to expose legacy names.
256 /// Returned slice must have 'static lifetime (typically a static array reference).
257 fn aliases(&self) -> &'static [&'static str] {
258 &[]
259 }
260
261 #[inline]
262 fn function_salt(&self) -> u64 {
263 // Stable hash of function name + namespace
264 let full_name = if self.namespace().is_empty() {
265 self.name().to_string()
266 } else {
267 format!("{}::{}", self.namespace(), self.name())
268 };
269 crate::rng::fnv1a64(full_name.as_bytes())
270 }
271
272 /// The default, scalar evaluation path.
273 ///
274 /// This method is the fallback for all functions and the only required
275 /// evaluation path. It processes arguments one by one.
276 fn eval_scalar<'a, 'b>(
277 &self,
278 args: &'a [ArgumentHandle<'a, 'b>],
279 ctx: &dyn crate::traits::FunctionContext,
280 ) -> Result<LiteralValue, ExcelError>;
281
282 // --- Optional Fast Paths ---
283
284 /// An optional, optimized path for reduction functions (e.g., `SUM`, `COUNT`).
285 ///
286 /// This method is called by the engine if the `REDUCTION` capability is set.
287 /// It operates on a `FnFoldCtx` which provides efficient access to input data.
288 fn eval_fold(&self, _f: &mut dyn FnFoldCtx) -> Option<Result<LiteralValue, ExcelError>> {
289 None
290 }
291
292 /// An optional, optimized path for element-wise functions (e.g., `SIN`, `ABS`).
293 ///
294 /// This method is called by the engine if the `ELEMENTWISE` capability is set.
295 /// It operates on a `FnMapCtx` which provides direct access to input/output
296 /// data stripes for vectorized processing.
297 fn eval_map(&self, _m: &mut dyn FnMapCtx) -> Option<Result<LiteralValue, ExcelError>> {
298 None
299 }
300
301 /// An optional, optimized path for windowed functions (e.g., `MOVING_AVERAGE`).
302 ///
303 /// This method is called by the engine if the `WINDOWED` capability is set.
304 fn eval_window<'a, 'b>(
305 &self,
306 _w: &mut crate::window_ctx::SimpleWindowCtx<'a, 'b>,
307 ) -> Option<Result<LiteralValue, ExcelError>> {
308 None
309 }
310
311 /// Optional reference result path. Only called by the interpreter/engine
312 /// when the callsite expects a reference (e.g., range combinators, by-ref
313 /// argument positions, or spill sources).
314 ///
315 /// Default implementation returns `None`, indicating the function does not
316 /// support returning references. Functions that set `RETURNS_REFERENCE`
317 /// should override this.
318 fn eval_reference<'a, 'b>(
319 &self,
320 _args: &'a [ArgumentHandle<'a, 'b>],
321 _ctx: &dyn crate::traits::FunctionContext,
322 ) -> Option<Result<formualizer_parse::parser::ReferenceType, ExcelError>> {
323 None
324 }
325
326 /// Dispatch to the most optimal evaluation path based on capabilities.
327 /// This default implementation checks caps and calls the appropriate eval method.
328 fn dispatch<'a, 'b>(
329 &self,
330 args: &'a [crate::traits::ArgumentHandle<'a, 'b>],
331 ctx: &dyn crate::traits::FunctionContext,
332 ) -> Result<LiteralValue, ExcelError> {
333 let caps = self.caps();
334
335 // Central argument validation (always on)
336 {
337 use crate::args::{ValidationOptions, validate_and_prepare};
338 let schema = self.arg_schema();
339 // Strict validation; convert errors to value errors to preserve interpreter Ok path
340 if let Err(e) =
341 validate_and_prepare(args, schema, ValidationOptions { warn_only: false })
342 {
343 return Ok(LiteralValue::Error(e));
344 }
345 }
346
347 // Try fast paths based on capabilities
348 // Commented out for now until we get `eval_scalar robustly working in real-world tests`
349 // if caps.contains(FnCaps::REDUCTION) {
350 // // Create fold context and try eval_fold
351 // let mut fold_ctx = SimpleFoldCtx::new(args, ctx);
352 // if let Some(result) = self.eval_fold(&mut fold_ctx) {
353 // return result;
354 // }
355 // }
356
357 if caps.contains(FnCaps::ELEMENTWISE) {
358 // Minimal unary elementwise path: construct a simple map ctx and call eval_map
359 let mut m = crate::map_ctx::SimpleMapCtx::new(args, ctx);
360 if FnMapCtx::is_array_context(&m) {
361 let dyn_m: &mut dyn FnMapCtx = &mut m;
362 if let Some(result) = self.eval_map(dyn_m) {
363 return result;
364 }
365 }
366 }
367
368 if caps.contains(FnCaps::WINDOWED) {
369 // Construct a minimal window context with a default spec; functions can downcast.
370 let mut w = crate::window_ctx::SimpleWindowCtx::new(
371 args,
372 ctx,
373 crate::window_ctx::WindowSpec::default(),
374 );
375 if let Some(result) = self.eval_window(&mut w) {
376 return result;
377 }
378 }
379
380 // Fallback to scalar evaluation
381 self.eval_scalar(args, ctx)
382 }
383}