formualizer_eval/
function.rs

1//! formualizer-eval/src/function.rs
2// New home for the core `Function` trait and its capability flags.
3
4use core::panic;
5
6use crate::{args::ArgSchema, traits::ArgumentHandle};
7use formualizer_common::{ExcelError, LiteralValue};
8
9bitflags::bitflags! {
10    /// Describes the capabilities and properties of a function.
11    ///
12    /// This allows the engine to select optimal evaluation paths (e.g., vectorized,
13    /// parallel, GPU) and to enforce semantic contracts at compile time.
14    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
15    pub struct FnCaps: u16 {
16        // --- Semantics ---
17        /// The function always produces the same output for the same input and has no
18        /// side effects. This is the default for most functions.
19        const PURE          = 0b0000_0000_0001;
20        /// The function's output can change even with the same inputs (e.g., `RAND()`,
21        /// `NOW()`). Volatile functions are re-evaluated on every sheet change.
22        const VOLATILE      = 0b0000_0000_0010;
23
24        // --- Shape / Evaluation Strategy ---
25        /// The function reduces a range of inputs to a single value (e.g., `SUM`, `AVERAGE`).
26        /// Can be implemented with `eval_fold`.
27        const REDUCTION     = 0b0000_0000_0100;
28        /// The function operates on each element of its input ranges independently
29        /// (e.g., `SIN`, `ABS`). Can be implemented with `eval_map`.
30        const ELEMENTWISE   = 0b0000_0000_1000;
31        /// The function operates on a sliding window over its input (e.g., `MOVING_AVERAGE`).
32        /// Can be implemented with `eval_window`.
33        const WINDOWED      = 0b0000_0001_0000;
34        /// The function performs a lookup or search operation (e.g., `VLOOKUP`).
35        const LOOKUP        = 0b0000_0010_0000;
36
37        // --- Input Data Types ---
38        /// The function primarily operates on numbers. The engine can prepare
39        /// optimized numeric stripes (`&[f64]`) for it.
40        const NUMERIC_ONLY  = 0b0000_0100_0000;
41        /// The function primarily operates on booleans.
42        const BOOL_ONLY     = 0b0000_1000_0000;
43
44        // --- Backend Optimizations ---
45        /// The function has an implementation suitable for SIMD vectorization.
46        const SIMD_OK       = 0b0001_0000_0000;
47        /// The function can process input as a stream, without materializing the
48        /// entire range in memory.
49        const STREAM_OK     = 0b0010_0000_0000;
50        /// The function has a GPU-accelerated implementation.
51        const GPU_OK        = 0b0100_0000_0000;
52
53        // --- Reference semantics ---
54        /// The function can return a reference (to a cell/range/table) when
55        /// evaluated in a reference context. When used in a value context,
56        /// engines may materialize the reference to a `LiteralValue`.
57    const RETURNS_REFERENCE = 0b1000_0000_0000;
58
59    // --- Planning / Interpreter parallelism hints ---
60    /// The function enforces left-to-right evaluation and early-exit semantics.
61    /// The planner must not evaluate arguments in parallel nor reorder them.
62    const SHORT_CIRCUIT  = 0b0001_0000_0000_0000;
63    /// It is safe and potentially profitable to evaluate arguments in parallel.
64    /// The engine should still fold results in argument order for determinism.
65    const PARALLEL_ARGS  = 0b0010_0000_0000_0000;
66    /// It is safe to chunk and process input windows in parallel (e.g., SUMIFS).
67    const PARALLEL_CHUNKS= 0b0100_0000_0000_0000;
68    }
69}
70
71// --- Fast-Path Evaluation Contexts ---
72
73use crate::traits::FunctionContext;
74use bumpalo::Bump;
75
76/// A simple slice of homogeneous values for efficient iteration
77pub struct SliceStripe<'a> {
78    pub head: &'a [LiteralValue],
79}
80
81/// Context for `eval_fold` (Reduction operations).
82/// Provides efficient iteration over input ranges for fold/reduce operations.
83pub trait FnFoldCtx {
84    /// Visit numeric chunks packed from all range arguments; no materialization required.
85    fn for_each_numeric_chunk(
86        &mut self,
87        min_chunk: usize,
88        f: &mut dyn FnMut(crate::stripes::NumericChunk) -> Result<(), ExcelError>,
89    ) -> Result<(), ExcelError>;
90
91    /// Visit cells (coerced via range visitors) in row-major order.
92    fn for_each_cell(
93        &mut self,
94        f: &mut dyn FnMut(&LiteralValue) -> Result<(), ExcelError>,
95    ) -> Result<(), ExcelError>;
96
97    /// Return accumulated result (for two-pass folds like AVERAGE).
98    fn write_result(&mut self, v: LiteralValue);
99
100    /// Access original argument handles (for functions needing parameter scalars like k in LARGE/SMALL)
101    fn args(&self) -> &[ArgumentHandle<'_, '_>];
102}
103
104/// Concrete implementation of FnFoldCtx
105pub struct SimpleFoldCtx<'a, 'b> {
106    args: &'a [ArgumentHandle<'a, 'b>],
107    _ctx: &'a dyn FunctionContext,
108    result: Option<LiteralValue>,
109    /// Temporary arena for allocating iteration data
110    arena: Bump,
111}
112
113impl<'a, 'b> SimpleFoldCtx<'a, 'b> {
114    pub fn new(args: &'a [ArgumentHandle<'a, 'b>], ctx: &'a dyn FunctionContext) -> Self {
115        Self {
116            args,
117            _ctx: ctx,
118            result: None,
119            arena: Bump::new(),
120        }
121    }
122
123    pub fn take_result(self) -> Option<LiteralValue> {
124        self.result
125    }
126}
127
128impl<'a, 'b> FnFoldCtx for SimpleFoldCtx<'a, 'b> {
129    fn for_each_numeric_chunk(
130        &mut self,
131        min_chunk: usize,
132        f: &mut dyn FnMut(crate::stripes::NumericChunk) -> Result<(), ExcelError>,
133    ) -> Result<(), ExcelError> {
134        for arg in self.args {
135            match arg.range_view() {
136                Ok(view) => {
137                    view.numbers_chunked(
138                        crate::args::CoercionPolicy::NumberLenientText,
139                        min_chunk,
140                        f,
141                    )?;
142                }
143                Err(_e_rv) => {
144                    // Fall back to scalar value; propagate errors
145                    match arg.value() {
146                        Ok(value) => {
147                            let as_num = match value.as_ref() {
148                                LiteralValue::Error(e) => {
149                                    return Err(e.clone());
150                                }
151                                other => crate::coercion::to_number_lenient_with_locale(
152                                    other,
153                                    &self._ctx.locale(),
154                                )
155                                .ok(),
156                            };
157                            if let Some(n) = as_num {
158                                let one = [n];
159                                f(crate::stripes::NumericChunk {
160                                    data: &one,
161                                    validity: None,
162                                })?;
163                            }
164                        }
165                        Err(e_val) => {
166                            // Both range_view and value resolution failed → propagate error
167                            return Err(e_val);
168                        }
169                    }
170                }
171            }
172        }
173        Ok(())
174    }
175
176    fn for_each_cell(
177        &mut self,
178        f: &mut dyn FnMut(&LiteralValue) -> Result<(), ExcelError>,
179    ) -> Result<(), ExcelError> {
180        for arg in self.args {
181            if let Ok(view) = arg.range_view() {
182                view.for_each_cell(f)?;
183            } else if let Ok(value) = arg.value() {
184                f(value.as_ref())?;
185            }
186        }
187        Ok(())
188    }
189
190    fn write_result(&mut self, v: LiteralValue) {
191        self.result = Some(v);
192    }
193
194    fn args(&self) -> &[ArgumentHandle<'_, '_>] {
195        self.args
196    }
197}
198
199/// Context for `eval_map` (Element-wise operations).
200pub trait FnMapCtx {
201    /// Whether inputs indicate an array/range context. If false, callers should fall back to scalar.
202    fn is_array_context(&self) -> bool;
203
204    /// Apply a unary numeric mapping over the broadcasted input. The closure should return the mapped cell.
205    fn map_unary_numeric(
206        &mut self,
207        f: &mut dyn FnMut(f64) -> Result<LiteralValue, ExcelError>,
208    ) -> Result<(), ExcelError>;
209
210    /// Apply a binary numeric mapping over the broadcasted inputs (first two args).
211    fn map_binary_numeric(
212        &mut self,
213        f: &mut dyn FnMut(f64, f64) -> Result<LiteralValue, ExcelError>,
214    ) -> Result<(), ExcelError>;
215
216    /// Finalize and retrieve the output value (typically an Array). Implementations may move out internal buffers.
217    fn finalize(&mut self) -> LiteralValue;
218}
219
220// Windowed functions use the trait from window_ctx module.
221
222/// Revised, object-safe trait for all Excel-style functions.
223///
224/// This trait uses a capability-based model (`FnCaps`) to declare function
225/// properties, enabling the evaluation engine to select the most optimal
226/// execution path (e.g., scalar, vectorized, parallel).
227pub trait Function: Send + Sync + 'static {
228    /// Capability flags for this function
229    fn caps(&self) -> FnCaps {
230        FnCaps::PURE
231    }
232
233    fn name(&self) -> &'static str;
234    fn namespace(&self) -> &'static str {
235        ""
236    }
237    fn min_args(&self) -> usize {
238        0
239    }
240    fn variadic(&self) -> bool {
241        false
242    }
243    fn volatile(&self) -> bool {
244        self.caps().contains(FnCaps::VOLATILE)
245    }
246    fn arg_schema(&self) -> &'static [ArgSchema] {
247        if self.min_args() > 0 {
248            panic!("Non-zero min_args must have a valid arg_schema");
249        } else {
250            &[]
251        }
252    }
253
254    /// Optional list of additional alias names (case-insensitive) that should resolve to this
255    /// function. Default: empty slice. Implementors can override to expose legacy names.
256    /// Returned slice must have 'static lifetime (typically a static array reference).
257    fn aliases(&self) -> &'static [&'static str] {
258        &[]
259    }
260
261    #[inline]
262    fn function_salt(&self) -> u64 {
263        // Stable hash of function name + namespace
264        let full_name = if self.namespace().is_empty() {
265            self.name().to_string()
266        } else {
267            format!("{}::{}", self.namespace(), self.name())
268        };
269        crate::rng::fnv1a64(full_name.as_bytes())
270    }
271
272    /// The default, scalar evaluation path.
273    ///
274    /// This method is the fallback for all functions and the only required
275    /// evaluation path. It processes arguments one by one.
276    fn eval_scalar<'a, 'b>(
277        &self,
278        args: &'a [ArgumentHandle<'a, 'b>],
279        ctx: &dyn crate::traits::FunctionContext,
280    ) -> Result<LiteralValue, ExcelError>;
281
282    // --- Optional Fast Paths ---
283
284    /// An optional, optimized path for reduction functions (e.g., `SUM`, `COUNT`).
285    ///
286    /// This method is called by the engine if the `REDUCTION` capability is set.
287    /// It operates on a `FnFoldCtx` which provides efficient access to input data.
288    fn eval_fold(&self, _f: &mut dyn FnFoldCtx) -> Option<Result<LiteralValue, ExcelError>> {
289        None
290    }
291
292    /// An optional, optimized path for element-wise functions (e.g., `SIN`, `ABS`).
293    ///
294    /// This method is called by the engine if the `ELEMENTWISE` capability is set.
295    /// It operates on a `FnMapCtx` which provides direct access to input/output
296    /// data stripes for vectorized processing.
297    fn eval_map(&self, _m: &mut dyn FnMapCtx) -> Option<Result<LiteralValue, ExcelError>> {
298        None
299    }
300
301    /// An optional, optimized path for windowed functions (e.g., `MOVING_AVERAGE`).
302    ///
303    /// This method is called by the engine if the `WINDOWED` capability is set.
304    fn eval_window<'a, 'b>(
305        &self,
306        _w: &mut crate::window_ctx::SimpleWindowCtx<'a, 'b>,
307    ) -> Option<Result<LiteralValue, ExcelError>> {
308        None
309    }
310
311    /// Optional reference result path. Only called by the interpreter/engine
312    /// when the callsite expects a reference (e.g., range combinators, by-ref
313    /// argument positions, or spill sources).
314    ///
315    /// Default implementation returns `None`, indicating the function does not
316    /// support returning references. Functions that set `RETURNS_REFERENCE`
317    /// should override this.
318    fn eval_reference<'a, 'b>(
319        &self,
320        _args: &'a [ArgumentHandle<'a, 'b>],
321        _ctx: &dyn crate::traits::FunctionContext,
322    ) -> Option<Result<formualizer_parse::parser::ReferenceType, ExcelError>> {
323        None
324    }
325
326    /// Dispatch to the most optimal evaluation path based on capabilities.
327    /// This default implementation checks caps and calls the appropriate eval method.
328    fn dispatch<'a, 'b>(
329        &self,
330        args: &'a [crate::traits::ArgumentHandle<'a, 'b>],
331        ctx: &dyn crate::traits::FunctionContext,
332    ) -> Result<LiteralValue, ExcelError> {
333        let caps = self.caps();
334
335        // Central argument validation (always on)
336        {
337            use crate::args::{ValidationOptions, validate_and_prepare};
338            let schema = self.arg_schema();
339            // Strict validation; convert errors to value errors to preserve interpreter Ok path
340            if let Err(e) =
341                validate_and_prepare(args, schema, ValidationOptions { warn_only: false })
342            {
343                return Ok(LiteralValue::Error(e));
344            }
345        }
346
347        // Try fast paths based on capabilities
348        // Commented out for now until we get `eval_scalar robustly working in real-world tests`
349        // if caps.contains(FnCaps::REDUCTION) {
350        //     // Create fold context and try eval_fold
351        //     let mut fold_ctx = SimpleFoldCtx::new(args, ctx);
352        //     if let Some(result) = self.eval_fold(&mut fold_ctx) {
353        //         return result;
354        //     }
355        // }
356
357        if caps.contains(FnCaps::ELEMENTWISE) {
358            // Minimal unary elementwise path: construct a simple map ctx and call eval_map
359            let mut m = crate::map_ctx::SimpleMapCtx::new(args, ctx);
360            if FnMapCtx::is_array_context(&m) {
361                let dyn_m: &mut dyn FnMapCtx = &mut m;
362                if let Some(result) = self.eval_map(dyn_m) {
363                    return result;
364                }
365            }
366        }
367
368        if caps.contains(FnCaps::WINDOWED) {
369            // Construct a minimal window context with a default spec; functions can downcast.
370            let mut w = crate::window_ctx::SimpleWindowCtx::new(
371                args,
372                ctx,
373                crate::window_ctx::WindowSpec::default(),
374            );
375            if let Some(result) = self.eval_window(&mut w) {
376                return result;
377            }
378        }
379
380        // Fallback to scalar evaluation
381        self.eval_scalar(args, ctx)
382    }
383}