formualizer_eval/function.rs
1//! formualizer-eval/src/function.rs
2// New home for the core `Function` trait and its capability flags.
3
4use core::panic;
5
6use crate::{
7 args::ArgSchema, function_contract::FunctionDependencyContract, traits::ArgumentHandle,
8};
9use formualizer_common::{ExcelError, LiteralValue};
10
11bitflags::bitflags! {
12 /// Describes the capabilities and properties of a function.
13 ///
14 /// This allows the engine to select optimal evaluation paths (e.g., vectorized,
15 /// parallel, GPU) and to enforce semantic contracts at compile time.
16 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
17 pub struct FnCaps: u16 {
18 // --- Semantics ---
19 /// The function always produces the same output for the same input and has no
20 /// side effects. This is the default for most functions.
21 const PURE = 0b0000_0000_0001;
22 /// The function's output can change even with the same inputs (e.g., `RAND()`,
23 /// `NOW()`). Volatile functions are re-evaluated on every sheet change.
24 const VOLATILE = 0b0000_0000_0010;
25
26 // --- Shape / Evaluation Strategy ---
27 /// The function reduces a range of inputs to a single value (e.g., `SUM`, `AVERAGE`).
28 const REDUCTION = 0b0000_0000_0100;
29 /// The function operates on each element of its input ranges independently
30 /// (e.g., `SIN`, `ABS`).
31 const ELEMENTWISE = 0b0000_0000_1000;
32 /// The function operates on a sliding window over its input (e.g., `MOVING_AVERAGE`).
33 const WINDOWED = 0b0000_0001_0000;
34 /// The function performs a lookup or search operation (e.g., `VLOOKUP`).
35 const LOOKUP = 0b0000_0010_0000;
36
37 // --- Input Data Types ---
38 /// The function primarily operates on numbers. The engine can prepare
39 /// optimized numeric stripes (`&[f64]`) for it.
40 const NUMERIC_ONLY = 0b0000_0100_0000;
41 /// The function primarily operates on booleans.
42 const BOOL_ONLY = 0b0000_1000_0000;
43
44 // --- Backend Optimizations ---
45 /// The function has an implementation suitable for SIMD vectorization.
46 const SIMD_OK = 0b0001_0000_0000;
47 /// The function can process input as a stream, without materializing the
48 /// entire range in memory.
49 const STREAM_OK = 0b0010_0000_0000;
50 /// The function has a GPU-accelerated implementation.
51 const GPU_OK = 0b0100_0000_0000;
52
53 // --- Reference semantics ---
54 /// The function can return a reference (to a cell/range/table) when
55 /// evaluated in a reference context. When used in a value context,
56 /// engines may materialize the reference to a `LiteralValue`.
57 const RETURNS_REFERENCE = 0b1000_0000_0000;
58
59 // --- Planning / Interpreter parallelism hints ---
60 /// The function enforces left-to-right evaluation and early-exit semantics.
61 /// The planner must not evaluate arguments in parallel nor reorder them.
62 const SHORT_CIRCUIT = 0b0001_0000_0000_0000;
63 /// It is safe and potentially profitable to evaluate arguments in parallel.
64 /// The engine should still fold results in argument order for determinism.
65 const PARALLEL_ARGS = 0b0010_0000_0000_0000;
66 /// It is safe to chunk and process input windows in parallel (e.g., SUMIFS).
67 /// It is safe to chunk and process input windows in parallel (e.g., SUMIFS).
68 const PARALLEL_CHUNKS= 0b0100_0000_0000_0000;
69 /// Function has dynamic dependencies determined at runtime (e.g. INDIRECT, OFFSET)
70 const DYNAMIC_DEPENDENCY = 0b1000_0000_0000_0000;
71 }
72}
73
74/// Revised, object-safe trait for all Excel-style functions.
75///
76/// This trait uses a capability-based model (`FnCaps`) to declare function
77/// properties, enabling the evaluation engine to select the most optimal
78/// execution path (e.g., scalar, vectorized, parallel).
79pub trait Function: Send + Sync + 'static {
80 /// Capability flags for this function
81 fn caps(&self) -> FnCaps {
82 FnCaps::PURE
83 }
84
85 fn name(&self) -> &'static str;
86 fn namespace(&self) -> &'static str {
87 ""
88 }
89 fn min_args(&self) -> usize {
90 0
91 }
92 fn variadic(&self) -> bool {
93 false
94 }
95 fn volatile(&self) -> bool {
96 self.caps().contains(FnCaps::VOLATILE)
97 }
98 fn arg_schema(&self) -> &'static [ArgSchema] {
99 if self.min_args() > 0 {
100 panic!("Non-zero min_args must have a valid arg_schema");
101 } else {
102 &[]
103 }
104 }
105
106 /// Optional list of additional alias names (case-insensitive) that should resolve to this
107 /// function. Default: empty slice. Implementors can override to expose legacy names.
108 /// Returned slice must have 'static lifetime (typically a static array reference).
109 fn aliases(&self) -> &'static [&'static str] {
110 &[]
111 }
112
113 /// Optional dependency contract for passive planning/FormulaPlane analysis.
114 ///
115 /// The default is deliberately conservative: functions that do not opt in
116 /// must not receive dependency-summary optimization. Implementations should
117 /// return `Some` only for arities and argument roles they can describe
118 /// without under-approximating dependencies.
119 fn dependency_contract(&self, _arity: usize) -> Option<FunctionDependencyContract> {
120 None
121 }
122
123 #[inline]
124 fn function_salt(&self) -> u64 {
125 // Stable hash of function name + namespace
126 let full_name = if self.namespace().is_empty() {
127 self.name().to_string()
128 } else {
129 format!("{}::{}", self.namespace(), self.name())
130 };
131 crate::rng::fnv1a64(full_name.as_bytes())
132 }
133
134 /// The unified evaluation path.
135 ///
136 /// This method replaces the separate scalar, fold, and map paths.
137 /// Functions use the provided `ArgumentHandle`s to access inputs as either
138 /// scalars or `RangeView`s (Arrow-backed virtual ranges).
139 fn eval<'a, 'b, 'c>(
140 &self,
141 args: &'c [ArgumentHandle<'a, 'b>],
142 ctx: &dyn crate::traits::FunctionContext<'b>,
143 ) -> Result<crate::traits::CalcValue<'b>, ExcelError>;
144
145 /// Optional reference result path. Only called by the interpreter/engine
146 /// when the callsite expects a reference (e.g., range combinators, by-ref
147 /// argument positions, or spill sources).
148 ///
149 /// Default implementation returns `None`, indicating the function does not
150 /// support returning references. Functions that set `RETURNS_REFERENCE`
151 /// should override this.
152 fn eval_reference<'a, 'b, 'c>(
153 &self,
154 _args: &'c [ArgumentHandle<'a, 'b>],
155 _ctx: &dyn crate::traits::FunctionContext<'b>,
156 ) -> Option<Result<formualizer_parse::parser::ReferenceType, ExcelError>> {
157 None
158 }
159
160 /// Dispatch to the unified evaluation path with automatic argument validation.
161 fn dispatch<'a, 'b, 'c>(
162 &self,
163 args: &'c [crate::traits::ArgumentHandle<'a, 'b>],
164 ctx: &dyn crate::traits::FunctionContext<'b>,
165 ) -> Result<crate::traits::CalcValue<'b>, ExcelError> {
166 // Short-circuit functions (IF/IFS/CHOOSE/SWITCH/AND/OR, ...) evaluate
167 // their arguments lazily inside `eval`; eagerly materializing every
168 // argument here would execute reads in untaken branches (defeating the
169 // documented short-circuit semantics) and double-evaluate taken ones.
170 // Their schemas are Any-kind with no per-arg coercion, so per-argument
171 // validation cannot fail; only the min-arity check is meaningful.
172 // (LET/LAMBDA already bypass validation via `dispatch` overrides for
173 // the same reason.)
174 if self.caps().contains(FnCaps::SHORT_CIRCUIT) {
175 if args.len() < self.min_args() {
176 return Ok(crate::traits::CalcValue::Scalar(LiteralValue::Error(
177 ExcelError::new(formualizer_common::ExcelErrorKind::Value).with_message(
178 format!(
179 "Too few arguments: expected at least {}, got {}",
180 self.min_args(),
181 args.len()
182 ),
183 ),
184 )));
185 }
186 return self.eval(args, ctx);
187 }
188
189 // Central argument validation (includes min-arity check)
190 {
191 use crate::args::{ValidationOptions, validate_and_prepare};
192 let schema = self.arg_schema();
193 if let Err(e) = validate_and_prepare(
194 args,
195 schema,
196 ValidationOptions {
197 warn_only: false,
198 min_args: self.min_args(),
199 },
200 ) {
201 return Ok(crate::traits::CalcValue::Scalar(LiteralValue::Error(e)));
202 }
203 }
204
205 self.eval(args, ctx)
206 }
207}