formualizer_eval/builtins/text/byte.rs
1//! Byte-oriented text functions for non-DBCS locales.
2//!
3//! Excel's `*B` functions count bytes in double-byte character set locales. Formualizer's locale
4//! layer is currently invariant/non-DBCS, so these functions delegate to their character-counting
5//! counterparts.
6
7use super::{FindFn, LeftFn, LenFn, MidFn, ReplaceFn, RightFn, SearchFn};
8use crate::args::ArgSchema;
9use crate::builtins::utils::ARG_ANY_ONE;
10use crate::function::Function;
11use crate::traits::{ArgumentHandle, FunctionContext};
12use formualizer_common::ExcelError;
13use formualizer_macros::func_caps;
14
15/// Finds text within text using non-DBCS byte-compatible semantics.
16///
17/// In Formualizer's invariant locale, FINDB delegates to FIND and counts Unicode
18/// scalar characters rather than double-byte locale bytes.
19///
20/// ```yaml,sandbox
21/// title: "Find byte-compatible text"
22/// formula: '=FINDB("CD","abcDEFCD")'
23/// expected: 7
24/// ```
25///
26/// ```yaml,docs
27/// related:
28/// - FIND
29/// - SEARCHB
30/// - LENB
31/// faq:
32/// - q: "Does FINDB use DBCS byte counts?"
33/// a: "Not currently. In the invariant locale it delegates to FIND."
34/// ```
35#[derive(Debug)]
36pub struct FindBFn;
37/// [formualizer-docgen:schema:start]
38/// Name: FINDB
39/// Type: FindBFn
40/// Min args: 2
41/// Max args: variadic
42/// Variadic: true
43/// Signature: FINDB(arg1...: any@scalar)
44/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
45/// Caps: PURE
46/// [formualizer-docgen:schema:end]
47impl Function for FindBFn {
48 func_caps!(PURE);
49 fn name(&self) -> &'static str {
50 "FINDB"
51 }
52 fn min_args(&self) -> usize {
53 2
54 }
55 fn variadic(&self) -> bool {
56 true
57 }
58 fn arg_schema(&self) -> &'static [ArgSchema] {
59 &ARG_ANY_ONE[..]
60 }
61 fn eval<'a, 'b, 'c>(
62 &self,
63 args: &'c [ArgumentHandle<'a, 'b>],
64 ctx: &dyn FunctionContext<'b>,
65 ) -> Result<crate::traits::CalcValue<'b>, ExcelError> {
66 FindFn.eval(args, ctx)
67 }
68}
69
70/// Returns the leftmost characters using non-DBCS byte-compatible semantics.
71///
72/// In Formualizer's invariant locale, LEFTB delegates to LEFT.
73///
74/// ```yaml,sandbox
75/// title: "Left byte-compatible characters"
76/// formula: '=LEFTB("hello",2)'
77/// expected: "he"
78/// ```
79///
80/// ```yaml,docs
81/// related:
82/// - LEFT
83/// - RIGHTB
84/// - MIDB
85/// faq:
86/// - q: "Does LEFTB split UTF-8 bytes?"
87/// a: "No. It follows the non-DBCS behavior and delegates to LEFT."
88/// ```
89#[derive(Debug)]
90pub struct LeftBFn;
91/// [formualizer-docgen:schema:start]
92/// Name: LEFTB
93/// Type: LeftBFn
94/// Min args: 1
95/// Max args: variadic
96/// Variadic: true
97/// Signature: LEFTB(arg1...: any@scalar)
98/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
99/// Caps: PURE
100/// [formualizer-docgen:schema:end]
101impl Function for LeftBFn {
102 func_caps!(PURE);
103 fn name(&self) -> &'static str {
104 "LEFTB"
105 }
106 fn min_args(&self) -> usize {
107 1
108 }
109 fn variadic(&self) -> bool {
110 true
111 }
112 fn arg_schema(&self) -> &'static [ArgSchema] {
113 &ARG_ANY_ONE[..]
114 }
115 fn eval<'a, 'b, 'c>(
116 &self,
117 args: &'c [ArgumentHandle<'a, 'b>],
118 ctx: &dyn FunctionContext<'b>,
119 ) -> Result<crate::traits::CalcValue<'b>, ExcelError> {
120 LeftFn.eval(args, ctx)
121 }
122}
123
124/// Returns text length using non-DBCS byte-compatible semantics.
125///
126/// In Formualizer's invariant locale, LENB delegates to LEN.
127///
128/// ```yaml,sandbox
129/// title: "Length of text"
130/// formula: '=LENB("abc")'
131/// expected: 3
132/// ```
133///
134/// ```yaml,docs
135/// related:
136/// - LEN
137/// - LEFTB
138/// - RIGHTB
139/// faq:
140/// - q: "Does LENB count UTF-8 bytes?"
141/// a: "No. In the invariant locale it matches LEN."
142/// ```
143#[derive(Debug)]
144pub struct LenBFn;
145/// [formualizer-docgen:schema:start]
146/// Name: LENB
147/// Type: LenBFn
148/// Min args: 1
149/// Max args: 1
150/// Variadic: false
151/// Signature: LENB(arg1: any@scalar)
152/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
153/// Caps: PURE
154/// [formualizer-docgen:schema:end]
155impl Function for LenBFn {
156 func_caps!(PURE);
157 fn name(&self) -> &'static str {
158 "LENB"
159 }
160 fn min_args(&self) -> usize {
161 1
162 }
163 fn arg_schema(&self) -> &'static [ArgSchema] {
164 &ARG_ANY_ONE[..]
165 }
166 fn eval<'a, 'b, 'c>(
167 &self,
168 args: &'c [ArgumentHandle<'a, 'b>],
169 ctx: &dyn FunctionContext<'b>,
170 ) -> Result<crate::traits::CalcValue<'b>, ExcelError> {
171 LenFn.eval(args, ctx)
172 }
173}
174
175/// Extracts text from the middle using non-DBCS byte-compatible semantics.
176///
177/// In Formualizer's invariant locale, MIDB delegates to MID.
178///
179/// ```yaml,sandbox
180/// title: "Middle byte-compatible characters"
181/// formula: '=MIDB("abcdef",2,3)'
182/// expected: "bcd"
183/// ```
184///
185/// ```yaml,docs
186/// related:
187/// - MID
188/// - LEFTB
189/// - RIGHTB
190/// faq:
191/// - q: "How are byte positions interpreted?"
192/// a: "In the invariant locale, positions are character positions matching MID."
193/// ```
194#[derive(Debug)]
195pub struct MidBFn;
196/// [formualizer-docgen:schema:start]
197/// Name: MIDB
198/// Type: MidBFn
199/// Min args: 3
200/// Max args: 1
201/// Variadic: false
202/// Signature: MIDB(arg1: any@scalar)
203/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
204/// Caps: PURE
205/// [formualizer-docgen:schema:end]
206impl Function for MidBFn {
207 func_caps!(PURE);
208 fn name(&self) -> &'static str {
209 "MIDB"
210 }
211 fn min_args(&self) -> usize {
212 3
213 }
214 fn arg_schema(&self) -> &'static [ArgSchema] {
215 &ARG_ANY_ONE[..]
216 }
217 fn eval<'a, 'b, 'c>(
218 &self,
219 args: &'c [ArgumentHandle<'a, 'b>],
220 ctx: &dyn FunctionContext<'b>,
221 ) -> Result<crate::traits::CalcValue<'b>, ExcelError> {
222 MidFn.eval(args, ctx)
223 }
224}
225
226/// Replaces text using non-DBCS byte-compatible semantics.
227///
228/// In Formualizer's invariant locale, REPLACEB delegates to REPLACE.
229///
230/// ```yaml,sandbox
231/// title: "Replace byte-compatible text"
232/// formula: '=REPLACEB("abcdef",3,2,"ZZ")'
233/// expected: "abZZef"
234/// ```
235///
236/// ```yaml,docs
237/// related:
238/// - REPLACE
239/// - MIDB
240/// - FINDB
241/// faq:
242/// - q: "Does REPLACEB operate on raw UTF-8 bytes?"
243/// a: "No. In the invariant locale it delegates to REPLACE."
244/// ```
245#[derive(Debug)]
246pub struct ReplaceBFn;
247/// [formualizer-docgen:schema:start]
248/// Name: REPLACEB
249/// Type: ReplaceBFn
250/// Min args: 4
251/// Max args: 1
252/// Variadic: false
253/// Signature: REPLACEB(arg1: any@scalar)
254/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
255/// Caps: PURE
256/// [formualizer-docgen:schema:end]
257impl Function for ReplaceBFn {
258 func_caps!(PURE);
259 fn name(&self) -> &'static str {
260 "REPLACEB"
261 }
262 fn min_args(&self) -> usize {
263 4
264 }
265 fn arg_schema(&self) -> &'static [ArgSchema] {
266 &ARG_ANY_ONE[..]
267 }
268 fn eval<'a, 'b, 'c>(
269 &self,
270 args: &'c [ArgumentHandle<'a, 'b>],
271 ctx: &dyn FunctionContext<'b>,
272 ) -> Result<crate::traits::CalcValue<'b>, ExcelError> {
273 ReplaceFn.eval(args, ctx)
274 }
275}
276
277/// Returns the rightmost characters using non-DBCS byte-compatible semantics.
278///
279/// In Formualizer's invariant locale, RIGHTB delegates to RIGHT.
280///
281/// ```yaml,sandbox
282/// title: "Right byte-compatible characters"
283/// formula: '=RIGHTB("hello",2)'
284/// expected: "lo"
285/// ```
286///
287/// ```yaml,docs
288/// related:
289/// - RIGHT
290/// - LEFTB
291/// - MIDB
292/// faq:
293/// - q: "Does RIGHTB count DBCS bytes?"
294/// a: "Not currently. In the invariant locale it matches RIGHT."
295/// ```
296#[derive(Debug)]
297pub struct RightBFn;
298/// [formualizer-docgen:schema:start]
299/// Name: RIGHTB
300/// Type: RightBFn
301/// Min args: 1
302/// Max args: variadic
303/// Variadic: true
304/// Signature: RIGHTB(arg1...: any@scalar)
305/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
306/// Caps: PURE
307/// [formualizer-docgen:schema:end]
308impl Function for RightBFn {
309 func_caps!(PURE);
310 fn name(&self) -> &'static str {
311 "RIGHTB"
312 }
313 fn min_args(&self) -> usize {
314 1
315 }
316 fn variadic(&self) -> bool {
317 true
318 }
319 fn arg_schema(&self) -> &'static [ArgSchema] {
320 &ARG_ANY_ONE[..]
321 }
322 fn eval<'a, 'b, 'c>(
323 &self,
324 args: &'c [ArgumentHandle<'a, 'b>],
325 ctx: &dyn FunctionContext<'b>,
326 ) -> Result<crate::traits::CalcValue<'b>, ExcelError> {
327 RightFn.eval(args, ctx)
328 }
329}
330
331/// Searches text using non-DBCS byte-compatible semantics.
332///
333/// In Formualizer's invariant locale, SEARCHB delegates to SEARCH and supports
334/// the same wildcard behavior.
335///
336/// ```yaml,sandbox
337/// title: "Wildcard byte-compatible search"
338/// formula: '=SEARCHB("d?f","abcDEF")'
339/// expected: 4
340/// ```
341///
342/// ```yaml,docs
343/// related:
344/// - SEARCH
345/// - FINDB
346/// - LENB
347/// faq:
348/// - q: "Is SEARCHB case-sensitive?"
349/// a: "No. It follows SEARCH behavior in the invariant locale."
350/// ```
351#[derive(Debug)]
352pub struct SearchBFn;
353/// [formualizer-docgen:schema:start]
354/// Name: SEARCHB
355/// Type: SearchBFn
356/// Min args: 2
357/// Max args: variadic
358/// Variadic: true
359/// Signature: SEARCHB(arg1...: any@scalar)
360/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
361/// Caps: PURE
362/// [formualizer-docgen:schema:end]
363impl Function for SearchBFn {
364 func_caps!(PURE);
365 fn name(&self) -> &'static str {
366 "SEARCHB"
367 }
368 fn min_args(&self) -> usize {
369 2
370 }
371 fn variadic(&self) -> bool {
372 true
373 }
374 fn arg_schema(&self) -> &'static [ArgSchema] {
375 &ARG_ANY_ONE[..]
376 }
377 fn eval<'a, 'b, 'c>(
378 &self,
379 args: &'c [ArgumentHandle<'a, 'b>],
380 ctx: &dyn FunctionContext<'b>,
381 ) -> Result<crate::traits::CalcValue<'b>, ExcelError> {
382 SearchFn.eval(args, ctx)
383 }
384}
385
386pub fn register_builtins() {
387 use crate::function_registry::register_function;
388 use std::sync::Arc;
389
390 register_function(Arc::new(FindBFn));
391 register_function(Arc::new(LeftBFn));
392 register_function(Arc::new(LenBFn));
393 register_function(Arc::new(MidBFn));
394 register_function(Arc::new(ReplaceBFn));
395 register_function(Arc::new(RightBFn));
396 register_function(Arc::new(SearchBFn));
397}
398
399#[cfg(test)]
400mod tests {
401 use crate::test_workbook::TestWorkbook;
402 use formualizer_common::LiteralValue;
403 use formualizer_parse::parser::parse;
404
405 fn eval(formula: &str) -> LiteralValue {
406 let wb = TestWorkbook::new()
407 .with_function(std::sync::Arc::new(super::FindBFn))
408 .with_function(std::sync::Arc::new(super::LeftBFn))
409 .with_function(std::sync::Arc::new(super::LenBFn))
410 .with_function(std::sync::Arc::new(super::MidBFn))
411 .with_function(std::sync::Arc::new(super::ReplaceBFn))
412 .with_function(std::sync::Arc::new(super::RightBFn))
413 .with_function(std::sync::Arc::new(super::SearchBFn));
414 let interp = wb.interpreter();
415 let ast = parse(formula).expect("parse");
416 interp.evaluate_ast(&ast).expect("eval").into_literal()
417 }
418
419 #[test]
420 fn byte_functions_delegate_in_non_dbcs_locale() {
421 assert_eq!(eval("=LENB(\"éx\")"), LiteralValue::Int(2));
422 assert_eq!(eval("=LEFTB(\"hello\",2)"), LiteralValue::Text("he".into()));
423 assert_eq!(
424 eval("=RIGHTB(\"hello\",2)"),
425 LiteralValue::Text("lo".into())
426 );
427 assert_eq!(
428 eval("=MIDB(\"abcdef\",2,3)"),
429 LiteralValue::Text("bcd".into())
430 );
431 assert_eq!(
432 eval("=REPLACEB(\"abcdef\",3,2,\"ZZ\")"),
433 LiteralValue::Text("abZZef".into())
434 );
435 assert_eq!(eval("=FINDB(\"CD\",\"abcDEFCD\")"), LiteralValue::Int(7));
436 assert_eq!(eval("=SEARCHB(\"d?f\",\"abcDEF\")"), LiteralValue::Int(4));
437 }
438}