Skip to main content

formualizer_eval/builtins/text/
char_code_rept.rs

1//! CHAR, CODE, REPT text functions
2
3use super::super::utils::{ARG_ANY_ONE, ARG_ANY_TWO, coerce_num};
4use crate::args::ArgSchema;
5use crate::function::Function;
6use crate::traits::{ArgumentHandle, CalcValue, FunctionContext};
7use formualizer_common::{ExcelError, ExcelErrorKind, LiteralValue};
8use formualizer_macros::func_caps;
9
10fn scalar_like_value(arg: &ArgumentHandle<'_, '_>) -> Result<LiteralValue, ExcelError> {
11    Ok(match arg.value()? {
12        CalcValue::Scalar(v) => v,
13        CalcValue::Range(rv) => rv.get_cell(0, 0),
14        CalcValue::Callable(_) => LiteralValue::Error(
15            ExcelError::new(ExcelErrorKind::Calc).with_message("LAMBDA value must be invoked"),
16        ),
17    })
18}
19
20/// CHAR(number) - Returns the character specified by a number
21/// Excel uses Windows-1252 encoding for codes 1-255
22#[derive(Debug)]
23pub struct CharFn;
24/// Returns the character represented by a numeric code.
25///
26/// `CHAR` follows Excel-style Windows-1252 behavior for codes `1..255`.
27///
28/// # Remarks
29/// - Input is truncated to an integer code.
30/// - Valid code range is `1` through `255`; outside this range returns `#VALUE!`.
31/// - Codes in the Windows-1252 extension range (128-159) are mapped to Unicode equivalents.
32/// - Errors are propagated unchanged.
33///
34/// # Examples
35///
36/// ```yaml,sandbox
37/// title: "ASCII character"
38/// formula: '=CHAR(65)'
39/// expected: "A"
40/// ```
41///
42/// ```yaml,sandbox
43/// title: "Out-of-range code"
44/// formula: '=CHAR(300)'
45/// expected: "#VALUE!"
46/// ```
47///
48/// ```yaml,docs
49/// related:
50///   - CODE
51///   - UNICHAR
52///   - UNICODE
53/// faq:
54///   - q: "Which character set does CHAR use for codes 128-159?"
55///     a: "It follows Excel-style Windows-1252 mappings, including extended symbols in that range."
56/// ```
57/// [formualizer-docgen:schema:start]
58/// Name: CHAR
59/// Type: CharFn
60/// Min args: 1
61/// Max args: 1
62/// Variadic: false
63/// Signature: CHAR(arg1: any@scalar)
64/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
65/// Caps: PURE
66/// [formualizer-docgen:schema:end]
67impl Function for CharFn {
68    func_caps!(PURE);
69    fn name(&self) -> &'static str {
70        "CHAR"
71    }
72    fn min_args(&self) -> usize {
73        1
74    }
75    fn arg_schema(&self) -> &'static [ArgSchema] {
76        &ARG_ANY_ONE[..]
77    }
78    fn eval<'a, 'b, 'c>(
79        &self,
80        args: &'c [ArgumentHandle<'a, 'b>],
81        _: &dyn FunctionContext<'b>,
82    ) -> Result<CalcValue<'b>, ExcelError> {
83        let v = scalar_like_value(&args[0])?;
84        let n = match v {
85            LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
86            other => coerce_num(&other)?,
87        };
88
89        let code = n.trunc() as i32;
90
91        // Excel CHAR accepts 1-255
92        if !(1..=255).contains(&code) {
93            return Ok(CalcValue::Scalar(LiteralValue::Error(
94                ExcelError::new_value(),
95            )));
96        }
97
98        // Windows-1252 to Unicode mapping for codes 128-159
99        let unicode_char = match code as u8 {
100            0x80 => '\u{20AC}', // Euro sign
101            0x82 => '\u{201A}', // Single low-9 quotation mark
102            0x83 => '\u{0192}', // Latin small letter f with hook
103            0x84 => '\u{201E}', // Double low-9 quotation mark
104            0x85 => '\u{2026}', // Horizontal ellipsis
105            0x86 => '\u{2020}', // Dagger
106            0x87 => '\u{2021}', // Double dagger
107            0x88 => '\u{02C6}', // Modifier letter circumflex accent
108            0x89 => '\u{2030}', // Per mille sign
109            0x8A => '\u{0160}', // Latin capital letter S with caron
110            0x8B => '\u{2039}', // Single left-pointing angle quotation mark
111            0x8C => '\u{0152}', // Latin capital ligature OE
112            0x8E => '\u{017D}', // Latin capital letter Z with caron
113            0x91 => '\u{2018}', // Left single quotation mark
114            0x92 => '\u{2019}', // Right single quotation mark
115            0x93 => '\u{201C}', // Left double quotation mark
116            0x94 => '\u{201D}', // Right double quotation mark
117            0x95 => '\u{2022}', // Bullet
118            0x96 => '\u{2013}', // En dash
119            0x97 => '\u{2014}', // Em dash
120            0x98 => '\u{02DC}', // Small tilde
121            0x99 => '\u{2122}', // Trade mark sign
122            0x9A => '\u{0161}', // Latin small letter s with caron
123            0x9B => '\u{203A}', // Single right-pointing angle quotation mark
124            0x9C => '\u{0153}', // Latin small ligature oe
125            0x9E => '\u{017E}', // Latin small letter z with caron
126            0x9F => '\u{0178}', // Latin capital letter Y with diaeresis
127            0x81 | 0x8D | 0x8F | 0x90 | 0x9D => {
128                // Undefined in Windows-1252, return placeholder
129                '\u{FFFD}'
130            }
131            c => char::from(c),
132        };
133
134        Ok(CalcValue::Scalar(LiteralValue::Text(
135            unicode_char.to_string(),
136        )))
137    }
138}
139
140/// CODE(text) - Returns a numeric code for the first character in a text string
141#[derive(Debug)]
142pub struct CodeFn;
143/// Returns the numeric code of the first character in text.
144///
145/// `CODE` mirrors Excel behavior with Windows-1252 compatibility mappings.
146///
147/// # Remarks
148/// - Only the first character is inspected.
149/// - Empty text returns `#VALUE!`.
150/// - Text-like coercion is applied to non-text scalar inputs.
151/// - Known Unicode characters in the Windows-1252 extension map back to their Excel codes.
152///
153/// # Examples
154///
155/// ```yaml,sandbox
156/// title: "ASCII code"
157/// formula: '=CODE("A")'
158/// expected: 65
159/// ```
160///
161/// ```yaml,sandbox
162/// title: "Extended mapping"
163/// formula: '=CODE(CHAR(128))'
164/// expected: 128
165/// ```
166///
167/// ```yaml,docs
168/// related:
169///   - CHAR
170///   - UNICODE
171///   - UNICHAR
172/// faq:
173///   - q: "What if the input text is empty?"
174///     a: "CODE returns #VALUE! because there is no first character to evaluate."
175/// ```
176/// [formualizer-docgen:schema:start]
177/// Name: CODE
178/// Type: CodeFn
179/// Min args: 1
180/// Max args: 1
181/// Variadic: false
182/// Signature: CODE(arg1: any@scalar)
183/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
184/// Caps: PURE
185/// [formualizer-docgen:schema:end]
186impl Function for CodeFn {
187    func_caps!(PURE);
188    fn name(&self) -> &'static str {
189        "CODE"
190    }
191    fn min_args(&self) -> usize {
192        1
193    }
194    fn arg_schema(&self) -> &'static [ArgSchema] {
195        &ARG_ANY_ONE[..]
196    }
197    fn eval<'a, 'b, 'c>(
198        &self,
199        args: &'c [ArgumentHandle<'a, 'b>],
200        _: &dyn FunctionContext<'b>,
201    ) -> Result<CalcValue<'b>, ExcelError> {
202        let v = scalar_like_value(&args[0])?;
203        let s = match v {
204            LiteralValue::Text(t) => t,
205            LiteralValue::Empty => {
206                return Ok(CalcValue::Scalar(LiteralValue::Error(
207                    ExcelError::new_value(),
208                )));
209            }
210            LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
211            other => other.to_string(),
212        };
213
214        if s.is_empty() {
215            return Ok(CalcValue::Scalar(LiteralValue::Error(
216                ExcelError::new_value(),
217            )));
218        }
219
220        let first_char = s.chars().next().unwrap();
221
222        // Map Unicode back to Windows-1252 for Excel compatibility
223        let code = match first_char {
224            '\u{20AC}' => 0x80, // Euro sign
225            '\u{201A}' => 0x82, // Single low-9 quotation mark
226            '\u{0192}' => 0x83, // Latin small letter f with hook
227            '\u{201E}' => 0x84, // Double low-9 quotation mark
228            '\u{2026}' => 0x85, // Horizontal ellipsis
229            '\u{2020}' => 0x86, // Dagger
230            '\u{2021}' => 0x87, // Double dagger
231            '\u{02C6}' => 0x88, // Modifier letter circumflex accent
232            '\u{2030}' => 0x89, // Per mille sign
233            '\u{0160}' => 0x8A, // Latin capital letter S with caron
234            '\u{2039}' => 0x8B, // Single left-pointing angle quotation mark
235            '\u{0152}' => 0x8C, // Latin capital ligature OE
236            '\u{017D}' => 0x8E, // Latin capital letter Z with caron
237            '\u{2018}' => 0x91, // Left single quotation mark
238            '\u{2019}' => 0x92, // Right single quotation mark
239            '\u{201C}' => 0x93, // Left double quotation mark
240            '\u{201D}' => 0x94, // Right double quotation mark
241            '\u{2022}' => 0x95, // Bullet
242            '\u{2013}' => 0x96, // En dash
243            '\u{2014}' => 0x97, // Em dash
244            '\u{02DC}' => 0x98, // Small tilde
245            '\u{2122}' => 0x99, // Trade mark sign
246            '\u{0161}' => 0x9A, // Latin small letter s with caron
247            '\u{203A}' => 0x9B, // Single right-pointing angle quotation mark
248            '\u{0153}' => 0x9C, // Latin small ligature oe
249            '\u{017E}' => 0x9E, // Latin small letter z with caron
250            '\u{0178}' => 0x9F, // Latin capital letter Y with diaeresis
251            c if (c as u32) < 256 => c as i64,
252            c => c as i64, // For characters outside Windows-1252, return Unicode code point
253        };
254
255        Ok(CalcValue::Scalar(LiteralValue::Int(code)))
256    }
257}
258
259/// REPT(text, number_times) - Repeats text a given number of times
260#[derive(Debug)]
261pub struct ReptFn;
262/// Repeats a text string a specified number of times.
263///
264/// # Remarks
265/// - Repeat count is truncated to an integer.
266/// - Negative counts return `#VALUE!`.
267/// - Output longer than 32,767 characters returns `#VALUE!`.
268/// - Non-text first argument is coerced to text.
269///
270/// # Examples
271///
272/// ```yaml,sandbox
273/// title: "Repeat text three times"
274/// formula: '=REPT("ab", 3)'
275/// expected: "ababab"
276/// ```
277///
278/// ```yaml,sandbox
279/// title: "Negative count"
280/// formula: '=REPT("x", -1)'
281/// expected: "#VALUE!"
282/// ```
283///
284/// ```yaml,docs
285/// related:
286///   - CONCAT
287///   - TEXTJOIN
288///   - SUBSTITUTE
289/// faq:
290///   - q: "Can REPT return very long strings?"
291///     a: "Only up to 32,767 characters; longer results return #VALUE! like Excel."
292/// ```
293/// [formualizer-docgen:schema:start]
294/// Name: REPT
295/// Type: ReptFn
296/// Min args: 2
297/// Max args: 2
298/// Variadic: false
299/// Signature: REPT(arg1: any@scalar, arg2: any@scalar)
300/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}; arg2{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
301/// Caps: PURE
302/// [formualizer-docgen:schema:end]
303impl Function for ReptFn {
304    func_caps!(PURE);
305    fn name(&self) -> &'static str {
306        "REPT"
307    }
308    fn min_args(&self) -> usize {
309        2
310    }
311    fn arg_schema(&self) -> &'static [ArgSchema] {
312        &ARG_ANY_TWO[..]
313    }
314    fn eval<'a, 'b, 'c>(
315        &self,
316        args: &'c [ArgumentHandle<'a, 'b>],
317        _: &dyn FunctionContext<'b>,
318    ) -> Result<CalcValue<'b>, ExcelError> {
319        let text_val = scalar_like_value(&args[0])?;
320        let count_val = scalar_like_value(&args[1])?;
321
322        let text = match text_val {
323            LiteralValue::Text(t) => t,
324            LiteralValue::Empty => String::new(),
325            LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
326            other => other.to_string(),
327        };
328
329        let count = match count_val {
330            LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
331            other => coerce_num(&other)?,
332        };
333
334        let count = count.trunc() as i64;
335
336        if count < 0 {
337            return Ok(CalcValue::Scalar(LiteralValue::Error(
338                ExcelError::new_value(),
339            )));
340        }
341
342        // Excel limits result to 32767 characters
343        let max_result_len = 32767;
344        let result_len = text.len() * (count as usize);
345        if result_len > max_result_len {
346            return Ok(CalcValue::Scalar(LiteralValue::Error(
347                ExcelError::new_value(),
348            )));
349        }
350
351        let result = text.repeat(count as usize);
352        Ok(CalcValue::Scalar(LiteralValue::Text(result)))
353    }
354}
355
356pub fn register_builtins() {
357    use std::sync::Arc;
358    crate::function_registry::register_function(Arc::new(CharFn));
359    crate::function_registry::register_function(Arc::new(CodeFn));
360    crate::function_registry::register_function(Arc::new(ReptFn));
361}
362
363#[cfg(test)]
364mod tests {
365    use super::*;
366    use crate::test_workbook::TestWorkbook;
367    use crate::traits::ArgumentHandle;
368    use formualizer_parse::parser::{ASTNode, ASTNodeType};
369
370    fn interp(wb: &TestWorkbook) -> crate::interpreter::Interpreter<'_> {
371        wb.interpreter()
372    }
373    fn lit(v: LiteralValue) -> ASTNode {
374        ASTNode::new(ASTNodeType::Literal(v), None)
375    }
376
377    #[test]
378    fn char_basic() {
379        let wb = TestWorkbook::new().with_function(std::sync::Arc::new(CharFn));
380        let ctx = interp(&wb);
381        let n = lit(LiteralValue::Number(65.0));
382        let f = ctx.context.get_function("", "CHAR").unwrap();
383        assert_eq!(
384            f.dispatch(
385                &[ArgumentHandle::new(&n, &ctx)],
386                &ctx.function_context(None)
387            )
388            .unwrap()
389            .into_literal(),
390            LiteralValue::Text("A".to_string())
391        );
392    }
393
394    #[test]
395    fn code_basic() {
396        let wb = TestWorkbook::new().with_function(std::sync::Arc::new(CodeFn));
397        let ctx = interp(&wb);
398        let s = lit(LiteralValue::Text("A".to_string()));
399        let f = ctx.context.get_function("", "CODE").unwrap();
400        assert_eq!(
401            f.dispatch(
402                &[ArgumentHandle::new(&s, &ctx)],
403                &ctx.function_context(None)
404            )
405            .unwrap()
406            .into_literal(),
407            LiteralValue::Int(65)
408        );
409    }
410
411    #[test]
412    fn rept_basic() {
413        let wb = TestWorkbook::new().with_function(std::sync::Arc::new(ReptFn));
414        let ctx = interp(&wb);
415        let s = lit(LiteralValue::Text("ab".to_string()));
416        let n = lit(LiteralValue::Number(3.0));
417        let f = ctx.context.get_function("", "REPT").unwrap();
418        assert_eq!(
419            f.dispatch(
420                &[ArgumentHandle::new(&s, &ctx), ArgumentHandle::new(&n, &ctx)],
421                &ctx.function_context(None)
422            )
423            .unwrap()
424            .into_literal(),
425            LiteralValue::Text("ababab".to_string())
426        );
427    }
428}