Skip to main content

formualizer_eval/builtins/text/
byte.rs

1//! Byte-oriented text functions for non-DBCS locales.
2//!
3//! Excel's `*B` functions count bytes in double-byte character set locales. Formualizer's locale
4//! layer is currently invariant/non-DBCS, so these functions delegate to their character-counting
5//! counterparts.
6
7use super::{FindFn, LeftFn, LenFn, MidFn, ReplaceFn, RightFn, SearchFn};
8use crate::args::ArgSchema;
9use crate::builtins::utils::ARG_ANY_ONE;
10use crate::function::Function;
11use crate::traits::{ArgumentHandle, FunctionContext};
12use formualizer_common::ExcelError;
13use formualizer_macros::func_caps;
14
15/// Finds text within text using non-DBCS byte-compatible semantics.
16///
17/// In Formualizer's invariant locale, FINDB delegates to FIND and counts Unicode
18/// scalar characters rather than double-byte locale bytes.
19///
20/// ```yaml,sandbox
21/// title: "Find byte-compatible text"
22/// formula: '=FINDB("CD","abcDEFCD")'
23/// expected: 7
24/// ```
25///
26/// ```yaml,docs
27/// related:
28///   - FIND
29///   - SEARCHB
30///   - LENB
31/// faq:
32///   - q: "Does FINDB use DBCS byte counts?"
33///     a: "Not currently. In the invariant locale it delegates to FIND."
34/// ```
35#[derive(Debug)]
36pub struct FindBFn;
37/// [formualizer-docgen:schema:start]
38/// Name: FINDB
39/// Type: FindBFn
40/// Min args: 2
41/// Max args: variadic
42/// Variadic: true
43/// Signature: FINDB(arg1...: any@scalar)
44/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
45/// Caps: PURE
46/// [formualizer-docgen:schema:end]
47impl Function for FindBFn {
48    func_caps!(PURE);
49    fn name(&self) -> &'static str {
50        "FINDB"
51    }
52    fn min_args(&self) -> usize {
53        2
54    }
55    fn variadic(&self) -> bool {
56        true
57    }
58    fn arg_schema(&self) -> &'static [ArgSchema] {
59        &ARG_ANY_ONE[..]
60    }
61    fn eval<'a, 'b, 'c>(
62        &self,
63        args: &'c [ArgumentHandle<'a, 'b>],
64        ctx: &dyn FunctionContext<'b>,
65    ) -> Result<crate::traits::CalcValue<'b>, ExcelError> {
66        FindFn.eval(args, ctx)
67    }
68}
69
70/// Returns the leftmost characters using non-DBCS byte-compatible semantics.
71///
72/// In Formualizer's invariant locale, LEFTB delegates to LEFT.
73///
74/// ```yaml,sandbox
75/// title: "Left byte-compatible characters"
76/// formula: '=LEFTB("hello",2)'
77/// expected: "he"
78/// ```
79///
80/// ```yaml,docs
81/// related:
82///   - LEFT
83///   - RIGHTB
84///   - MIDB
85/// faq:
86///   - q: "Does LEFTB split UTF-8 bytes?"
87///     a: "No. It follows the non-DBCS behavior and delegates to LEFT."
88/// ```
89#[derive(Debug)]
90pub struct LeftBFn;
91/// [formualizer-docgen:schema:start]
92/// Name: LEFTB
93/// Type: LeftBFn
94/// Min args: 1
95/// Max args: variadic
96/// Variadic: true
97/// Signature: LEFTB(arg1...: any@scalar)
98/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
99/// Caps: PURE
100/// [formualizer-docgen:schema:end]
101impl Function for LeftBFn {
102    func_caps!(PURE);
103    fn name(&self) -> &'static str {
104        "LEFTB"
105    }
106    fn min_args(&self) -> usize {
107        1
108    }
109    fn variadic(&self) -> bool {
110        true
111    }
112    fn arg_schema(&self) -> &'static [ArgSchema] {
113        &ARG_ANY_ONE[..]
114    }
115    fn eval<'a, 'b, 'c>(
116        &self,
117        args: &'c [ArgumentHandle<'a, 'b>],
118        ctx: &dyn FunctionContext<'b>,
119    ) -> Result<crate::traits::CalcValue<'b>, ExcelError> {
120        LeftFn.eval(args, ctx)
121    }
122}
123
124/// Returns text length using non-DBCS byte-compatible semantics.
125///
126/// In Formualizer's invariant locale, LENB delegates to LEN.
127///
128/// ```yaml,sandbox
129/// title: "Length of text"
130/// formula: '=LENB("abc")'
131/// expected: 3
132/// ```
133///
134/// ```yaml,docs
135/// related:
136///   - LEN
137///   - LEFTB
138///   - RIGHTB
139/// faq:
140///   - q: "Does LENB count UTF-8 bytes?"
141///     a: "No. In the invariant locale it matches LEN."
142/// ```
143#[derive(Debug)]
144pub struct LenBFn;
145/// [formualizer-docgen:schema:start]
146/// Name: LENB
147/// Type: LenBFn
148/// Min args: 1
149/// Max args: 1
150/// Variadic: false
151/// Signature: LENB(arg1: any@scalar)
152/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
153/// Caps: PURE
154/// [formualizer-docgen:schema:end]
155impl Function for LenBFn {
156    func_caps!(PURE);
157    fn name(&self) -> &'static str {
158        "LENB"
159    }
160    fn min_args(&self) -> usize {
161        1
162    }
163    fn arg_schema(&self) -> &'static [ArgSchema] {
164        &ARG_ANY_ONE[..]
165    }
166    fn eval<'a, 'b, 'c>(
167        &self,
168        args: &'c [ArgumentHandle<'a, 'b>],
169        ctx: &dyn FunctionContext<'b>,
170    ) -> Result<crate::traits::CalcValue<'b>, ExcelError> {
171        LenFn.eval(args, ctx)
172    }
173}
174
175/// Extracts text from the middle using non-DBCS byte-compatible semantics.
176///
177/// In Formualizer's invariant locale, MIDB delegates to MID.
178///
179/// ```yaml,sandbox
180/// title: "Middle byte-compatible characters"
181/// formula: '=MIDB("abcdef",2,3)'
182/// expected: "bcd"
183/// ```
184///
185/// ```yaml,docs
186/// related:
187///   - MID
188///   - LEFTB
189///   - RIGHTB
190/// faq:
191///   - q: "How are byte positions interpreted?"
192///     a: "In the invariant locale, positions are character positions matching MID."
193/// ```
194#[derive(Debug)]
195pub struct MidBFn;
196/// [formualizer-docgen:schema:start]
197/// Name: MIDB
198/// Type: MidBFn
199/// Min args: 3
200/// Max args: 1
201/// Variadic: false
202/// Signature: MIDB(arg1: any@scalar)
203/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
204/// Caps: PURE
205/// [formualizer-docgen:schema:end]
206impl Function for MidBFn {
207    func_caps!(PURE);
208    fn name(&self) -> &'static str {
209        "MIDB"
210    }
211    fn min_args(&self) -> usize {
212        3
213    }
214    fn arg_schema(&self) -> &'static [ArgSchema] {
215        &ARG_ANY_ONE[..]
216    }
217    fn eval<'a, 'b, 'c>(
218        &self,
219        args: &'c [ArgumentHandle<'a, 'b>],
220        ctx: &dyn FunctionContext<'b>,
221    ) -> Result<crate::traits::CalcValue<'b>, ExcelError> {
222        MidFn.eval(args, ctx)
223    }
224}
225
226/// Replaces text using non-DBCS byte-compatible semantics.
227///
228/// In Formualizer's invariant locale, REPLACEB delegates to REPLACE.
229///
230/// ```yaml,sandbox
231/// title: "Replace byte-compatible text"
232/// formula: '=REPLACEB("abcdef",3,2,"ZZ")'
233/// expected: "abZZef"
234/// ```
235///
236/// ```yaml,docs
237/// related:
238///   - REPLACE
239///   - MIDB
240///   - FINDB
241/// faq:
242///   - q: "Does REPLACEB operate on raw UTF-8 bytes?"
243///     a: "No. In the invariant locale it delegates to REPLACE."
244/// ```
245#[derive(Debug)]
246pub struct ReplaceBFn;
247/// [formualizer-docgen:schema:start]
248/// Name: REPLACEB
249/// Type: ReplaceBFn
250/// Min args: 4
251/// Max args: 1
252/// Variadic: false
253/// Signature: REPLACEB(arg1: any@scalar)
254/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
255/// Caps: PURE
256/// [formualizer-docgen:schema:end]
257impl Function for ReplaceBFn {
258    func_caps!(PURE);
259    fn name(&self) -> &'static str {
260        "REPLACEB"
261    }
262    fn min_args(&self) -> usize {
263        4
264    }
265    fn arg_schema(&self) -> &'static [ArgSchema] {
266        &ARG_ANY_ONE[..]
267    }
268    fn eval<'a, 'b, 'c>(
269        &self,
270        args: &'c [ArgumentHandle<'a, 'b>],
271        ctx: &dyn FunctionContext<'b>,
272    ) -> Result<crate::traits::CalcValue<'b>, ExcelError> {
273        ReplaceFn.eval(args, ctx)
274    }
275}
276
277/// Returns the rightmost characters using non-DBCS byte-compatible semantics.
278///
279/// In Formualizer's invariant locale, RIGHTB delegates to RIGHT.
280///
281/// ```yaml,sandbox
282/// title: "Right byte-compatible characters"
283/// formula: '=RIGHTB("hello",2)'
284/// expected: "lo"
285/// ```
286///
287/// ```yaml,docs
288/// related:
289///   - RIGHT
290///   - LEFTB
291///   - MIDB
292/// faq:
293///   - q: "Does RIGHTB count DBCS bytes?"
294///     a: "Not currently. In the invariant locale it matches RIGHT."
295/// ```
296#[derive(Debug)]
297pub struct RightBFn;
298/// [formualizer-docgen:schema:start]
299/// Name: RIGHTB
300/// Type: RightBFn
301/// Min args: 1
302/// Max args: variadic
303/// Variadic: true
304/// Signature: RIGHTB(arg1...: any@scalar)
305/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
306/// Caps: PURE
307/// [formualizer-docgen:schema:end]
308impl Function for RightBFn {
309    func_caps!(PURE);
310    fn name(&self) -> &'static str {
311        "RIGHTB"
312    }
313    fn min_args(&self) -> usize {
314        1
315    }
316    fn variadic(&self) -> bool {
317        true
318    }
319    fn arg_schema(&self) -> &'static [ArgSchema] {
320        &ARG_ANY_ONE[..]
321    }
322    fn eval<'a, 'b, 'c>(
323        &self,
324        args: &'c [ArgumentHandle<'a, 'b>],
325        ctx: &dyn FunctionContext<'b>,
326    ) -> Result<crate::traits::CalcValue<'b>, ExcelError> {
327        RightFn.eval(args, ctx)
328    }
329}
330
331/// Searches text using non-DBCS byte-compatible semantics.
332///
333/// In Formualizer's invariant locale, SEARCHB delegates to SEARCH and supports
334/// the same wildcard behavior.
335///
336/// ```yaml,sandbox
337/// title: "Wildcard byte-compatible search"
338/// formula: '=SEARCHB("d?f","abcDEF")'
339/// expected: 4
340/// ```
341///
342/// ```yaml,docs
343/// related:
344///   - SEARCH
345///   - FINDB
346///   - LENB
347/// faq:
348///   - q: "Is SEARCHB case-sensitive?"
349///     a: "No. It follows SEARCH behavior in the invariant locale."
350/// ```
351#[derive(Debug)]
352pub struct SearchBFn;
353/// [formualizer-docgen:schema:start]
354/// Name: SEARCHB
355/// Type: SearchBFn
356/// Min args: 2
357/// Max args: variadic
358/// Variadic: true
359/// Signature: SEARCHB(arg1...: any@scalar)
360/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
361/// Caps: PURE
362/// [formualizer-docgen:schema:end]
363impl Function for SearchBFn {
364    func_caps!(PURE);
365    fn name(&self) -> &'static str {
366        "SEARCHB"
367    }
368    fn min_args(&self) -> usize {
369        2
370    }
371    fn variadic(&self) -> bool {
372        true
373    }
374    fn arg_schema(&self) -> &'static [ArgSchema] {
375        &ARG_ANY_ONE[..]
376    }
377    fn eval<'a, 'b, 'c>(
378        &self,
379        args: &'c [ArgumentHandle<'a, 'b>],
380        ctx: &dyn FunctionContext<'b>,
381    ) -> Result<crate::traits::CalcValue<'b>, ExcelError> {
382        SearchFn.eval(args, ctx)
383    }
384}
385
386pub fn register_builtins() {
387    use crate::function_registry::register_function;
388    use std::sync::Arc;
389
390    register_function(Arc::new(FindBFn));
391    register_function(Arc::new(LeftBFn));
392    register_function(Arc::new(LenBFn));
393    register_function(Arc::new(MidBFn));
394    register_function(Arc::new(ReplaceBFn));
395    register_function(Arc::new(RightBFn));
396    register_function(Arc::new(SearchBFn));
397}
398
399#[cfg(test)]
400mod tests {
401    use crate::test_workbook::TestWorkbook;
402    use formualizer_common::LiteralValue;
403    use formualizer_parse::parser::parse;
404
405    fn eval(formula: &str) -> LiteralValue {
406        let wb = TestWorkbook::new()
407            .with_function(std::sync::Arc::new(super::FindBFn))
408            .with_function(std::sync::Arc::new(super::LeftBFn))
409            .with_function(std::sync::Arc::new(super::LenBFn))
410            .with_function(std::sync::Arc::new(super::MidBFn))
411            .with_function(std::sync::Arc::new(super::ReplaceBFn))
412            .with_function(std::sync::Arc::new(super::RightBFn))
413            .with_function(std::sync::Arc::new(super::SearchBFn));
414        let interp = wb.interpreter();
415        let ast = parse(formula).expect("parse");
416        interp.evaluate_ast(&ast).expect("eval").into_literal()
417    }
418
419    #[test]
420    fn byte_functions_delegate_in_non_dbcs_locale() {
421        assert_eq!(eval("=LENB(\"éx\")"), LiteralValue::Int(2));
422        assert_eq!(eval("=LEFTB(\"hello\",2)"), LiteralValue::Text("he".into()));
423        assert_eq!(
424            eval("=RIGHTB(\"hello\",2)"),
425            LiteralValue::Text("lo".into())
426        );
427        assert_eq!(
428            eval("=MIDB(\"abcdef\",2,3)"),
429            LiteralValue::Text("bcd".into())
430        );
431        assert_eq!(
432            eval("=REPLACEB(\"abcdef\",3,2,\"ZZ\")"),
433            LiteralValue::Text("abZZef".into())
434        );
435        assert_eq!(eval("=FINDB(\"CD\",\"abcDEFCD\")"), LiteralValue::Int(7));
436        assert_eq!(eval("=SEARCHB(\"d?f\",\"abcDEF\")"), LiteralValue::Int(4));
437    }
438}