1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
//! formualizer-eval/src/function.rs
// New home for the core `Function` trait and its capability flags.
use core::panic;
use crate::{
args::ArgSchema, function_contract::FunctionDependencyContract, traits::ArgumentHandle,
};
use formualizer_common::{ExcelError, LiteralValue};
bitflags::bitflags! {
/// Describes the capabilities and properties of a function.
///
/// This allows the engine to select optimal evaluation paths (e.g., vectorized,
/// parallel, GPU) and to enforce semantic contracts at compile time.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct FnCaps: u16 {
// --- Semantics ---
/// The function always produces the same output for the same input and has no
/// side effects. This is the default for most functions.
const PURE = 0b0000_0000_0001;
/// The function's output can change even with the same inputs (e.g., `RAND()`,
/// `NOW()`). Volatile functions are re-evaluated on every sheet change.
const VOLATILE = 0b0000_0000_0010;
// --- Shape / Evaluation Strategy ---
/// The function reduces a range of inputs to a single value (e.g., `SUM`, `AVERAGE`).
const REDUCTION = 0b0000_0000_0100;
/// The function operates on each element of its input ranges independently
/// (e.g., `SIN`, `ABS`).
const ELEMENTWISE = 0b0000_0000_1000;
/// The function operates on a sliding window over its input (e.g., `MOVING_AVERAGE`).
const WINDOWED = 0b0000_0001_0000;
/// The function performs a lookup or search operation (e.g., `VLOOKUP`).
const LOOKUP = 0b0000_0010_0000;
// --- Input Data Types ---
/// The function primarily operates on numbers. The engine can prepare
/// optimized numeric stripes (`&[f64]`) for it.
const NUMERIC_ONLY = 0b0000_0100_0000;
/// The function primarily operates on booleans.
const BOOL_ONLY = 0b0000_1000_0000;
// --- Backend Optimizations ---
/// The function has an implementation suitable for SIMD vectorization.
const SIMD_OK = 0b0001_0000_0000;
/// The function can process input as a stream, without materializing the
/// entire range in memory.
const STREAM_OK = 0b0010_0000_0000;
/// The function has a GPU-accelerated implementation.
const GPU_OK = 0b0100_0000_0000;
// --- Reference semantics ---
/// The function can return a reference (to a cell/range/table) when
/// evaluated in a reference context. When used in a value context,
/// engines may materialize the reference to a `LiteralValue`.
const RETURNS_REFERENCE = 0b1000_0000_0000;
// --- Planning / Interpreter parallelism hints ---
/// The function enforces left-to-right evaluation and early-exit semantics.
/// The planner must not evaluate arguments in parallel nor reorder them.
const SHORT_CIRCUIT = 0b0001_0000_0000_0000;
/// It is safe and potentially profitable to evaluate arguments in parallel.
/// The engine should still fold results in argument order for determinism.
const PARALLEL_ARGS = 0b0010_0000_0000_0000;
/// It is safe to chunk and process input windows in parallel (e.g., SUMIFS).
/// It is safe to chunk and process input windows in parallel (e.g., SUMIFS).
const PARALLEL_CHUNKS= 0b0100_0000_0000_0000;
/// Function has dynamic dependencies determined at runtime (e.g. INDIRECT, OFFSET)
const DYNAMIC_DEPENDENCY = 0b1000_0000_0000_0000;
}
}
/// Revised, object-safe trait for all Excel-style functions.
///
/// This trait uses a capability-based model (`FnCaps`) to declare function
/// properties, enabling the evaluation engine to select the most optimal
/// execution path (e.g., scalar, vectorized, parallel).
pub trait Function: Send + Sync + 'static {
/// Capability flags for this function
fn caps(&self) -> FnCaps {
FnCaps::PURE
}
fn name(&self) -> &'static str;
fn namespace(&self) -> &'static str {
""
}
fn min_args(&self) -> usize {
0
}
fn variadic(&self) -> bool {
false
}
fn volatile(&self) -> bool {
self.caps().contains(FnCaps::VOLATILE)
}
fn arg_schema(&self) -> &'static [ArgSchema] {
if self.min_args() > 0 {
panic!("Non-zero min_args must have a valid arg_schema");
} else {
&[]
}
}
/// Optional list of additional alias names (case-insensitive) that should resolve to this
/// function. Default: empty slice. Implementors can override to expose legacy names.
/// Returned slice must have 'static lifetime (typically a static array reference).
fn aliases(&self) -> &'static [&'static str] {
&[]
}
/// Optional dependency contract for passive planning/FormulaPlane analysis.
///
/// The default is deliberately conservative: functions that do not opt in
/// must not receive dependency-summary optimization. Implementations should
/// return `Some` only for arities and argument roles they can describe
/// without under-approximating dependencies.
fn dependency_contract(&self, _arity: usize) -> Option<FunctionDependencyContract> {
None
}
#[inline]
fn function_salt(&self) -> u64 {
// Stable hash of function name + namespace
let full_name = if self.namespace().is_empty() {
self.name().to_string()
} else {
format!("{}::{}", self.namespace(), self.name())
};
crate::rng::fnv1a64(full_name.as_bytes())
}
/// The unified evaluation path.
///
/// This method replaces the separate scalar, fold, and map paths.
/// Functions use the provided `ArgumentHandle`s to access inputs as either
/// scalars or `RangeView`s (Arrow-backed virtual ranges).
fn eval<'a, 'b, 'c>(
&self,
args: &'c [ArgumentHandle<'a, 'b>],
ctx: &dyn crate::traits::FunctionContext<'b>,
) -> Result<crate::traits::CalcValue<'b>, ExcelError>;
/// Optional reference result path. Only called by the interpreter/engine
/// when the callsite expects a reference (e.g., range combinators, by-ref
/// argument positions, or spill sources).
///
/// Default implementation returns `None`, indicating the function does not
/// support returning references. Functions that set `RETURNS_REFERENCE`
/// should override this.
fn eval_reference<'a, 'b, 'c>(
&self,
_args: &'c [ArgumentHandle<'a, 'b>],
_ctx: &dyn crate::traits::FunctionContext<'b>,
) -> Option<Result<formualizer_parse::parser::ReferenceType, ExcelError>> {
None
}
/// Dispatch to the unified evaluation path with automatic argument validation.
fn dispatch<'a, 'b, 'c>(
&self,
args: &'c [crate::traits::ArgumentHandle<'a, 'b>],
ctx: &dyn crate::traits::FunctionContext<'b>,
) -> Result<crate::traits::CalcValue<'b>, ExcelError> {
// Short-circuit functions (IF/IFS/CHOOSE/SWITCH/AND/OR, ...) evaluate
// their arguments lazily inside `eval`; eagerly materializing every
// argument here would execute reads in untaken branches (defeating the
// documented short-circuit semantics) and double-evaluate taken ones.
// Their schemas are Any-kind with no per-arg coercion, so per-argument
// validation cannot fail; only the min-arity check is meaningful.
// (LET/LAMBDA already bypass validation via `dispatch` overrides for
// the same reason.)
if self.caps().contains(FnCaps::SHORT_CIRCUIT) {
if args.len() < self.min_args() {
return Ok(crate::traits::CalcValue::Scalar(LiteralValue::Error(
ExcelError::new(formualizer_common::ExcelErrorKind::Value).with_message(
format!(
"Too few arguments: expected at least {}, got {}",
self.min_args(),
args.len()
),
),
)));
}
return self.eval(args, ctx);
}
// Central argument validation (includes min-arity check)
{
use crate::args::{ValidationOptions, validate_and_prepare};
let schema = self.arg_schema();
if let Err(e) = validate_and_prepare(
args,
schema,
ValidationOptions {
warn_only: false,
min_args: self.min_args(),
},
) {
return Ok(crate::traits::CalcValue::Scalar(LiteralValue::Error(e)));
}
}
self.eval(args, ctx)
}
}