Skip to main content

formualizer_eval/builtins/text/
char_code_rept.rs

1//! CHAR, CODE, REPT text functions
2
3use super::super::utils::{ARG_ANY_ONE, ARG_ANY_TWO, coerce_num};
4use crate::args::ArgSchema;
5use crate::function::Function;
6use crate::traits::{ArgumentHandle, CalcValue, FunctionContext};
7use formualizer_common::{ExcelError, ExcelErrorKind, LiteralValue};
8use formualizer_macros::func_caps;
9
10fn scalar_like_value(arg: &ArgumentHandle<'_, '_>) -> Result<LiteralValue, ExcelError> {
11    Ok(match arg.value()? {
12        CalcValue::Scalar(v) => v,
13        CalcValue::Range(rv) => rv.get_cell(0, 0),
14        CalcValue::Callable(_) => LiteralValue::Error(
15            ExcelError::new(ExcelErrorKind::Calc).with_message("LAMBDA value must be invoked"),
16        ),
17    })
18}
19
20/// CHAR(number) - Returns the character specified by a number
21/// Excel uses Windows-1252 encoding for codes 1-255
22#[derive(Debug)]
23pub struct CharFn;
24/// Returns the character represented by a numeric code.
25///
26/// `CHAR` follows Excel-style Windows-1252 behavior for codes `1..255`.
27///
28/// # Remarks
29/// - Input is truncated to an integer code.
30/// - Valid code range is `1` through `255`; outside this range returns `#VALUE!`.
31/// - Codes in the Windows-1252 extension range (128-159) are mapped to Unicode equivalents.
32/// - Errors are propagated unchanged.
33///
34/// # Examples
35///
36/// ```yaml,sandbox
37/// title: "ASCII character"
38/// formula: '=CHAR(65)'
39/// expected: "A"
40/// ```
41///
42/// ```yaml,sandbox
43/// title: "Out-of-range code"
44/// formula: '=CHAR(300)'
45/// expected: "#VALUE!"
46/// ```
47///
48/// ```yaml,docs
49/// related:
50///   - CODE
51///   - UNICHAR
52///   - UNICODE
53/// faq:
54///   - q: "Which character set does CHAR use for codes 128-159?"
55///     a: "It follows Excel-style Windows-1252 mappings, including extended symbols in that range."
56/// ```
57/// [formualizer-docgen:schema:start]
58/// Name: CHAR
59/// Type: CharFn
60/// Min args: 1
61/// Max args: 1
62/// Variadic: false
63/// Signature: CHAR(arg1: any@scalar)
64/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
65/// Caps: PURE
66/// [formualizer-docgen:schema:end]
67impl Function for CharFn {
68    func_caps!(PURE);
69    fn name(&self) -> &'static str {
70        "CHAR"
71    }
72    fn min_args(&self) -> usize {
73        1
74    }
75    fn arg_schema(&self) -> &'static [ArgSchema] {
76        &ARG_ANY_ONE[..]
77    }
78    fn eval<'a, 'b, 'c>(
79        &self,
80        args: &'c [ArgumentHandle<'a, 'b>],
81        _: &dyn FunctionContext<'b>,
82    ) -> Result<CalcValue<'b>, ExcelError> {
83        let v = scalar_like_value(&args[0])?;
84        let n = match v {
85            LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
86            other => coerce_num(&other)?,
87        };
88
89        let code = n.trunc() as i32;
90
91        // Excel CHAR accepts 1-255
92        if !(1..=255).contains(&code) {
93            return Ok(CalcValue::Scalar(LiteralValue::Error(
94                ExcelError::new_value(),
95            )));
96        }
97
98        // Windows-1252 to Unicode mapping for codes 128-159
99        let unicode_char = match code as u8 {
100            0x80 => '\u{20AC}', // Euro sign
101            0x82 => '\u{201A}', // Single low-9 quotation mark
102            0x83 => '\u{0192}', // Latin small letter f with hook
103            0x84 => '\u{201E}', // Double low-9 quotation mark
104            0x85 => '\u{2026}', // Horizontal ellipsis
105            0x86 => '\u{2020}', // Dagger
106            0x87 => '\u{2021}', // Double dagger
107            0x88 => '\u{02C6}', // Modifier letter circumflex accent
108            0x89 => '\u{2030}', // Per mille sign
109            0x8A => '\u{0160}', // Latin capital letter S with caron
110            0x8B => '\u{2039}', // Single left-pointing angle quotation mark
111            0x8C => '\u{0152}', // Latin capital ligature OE
112            0x8E => '\u{017D}', // Latin capital letter Z with caron
113            0x91 => '\u{2018}', // Left single quotation mark
114            0x92 => '\u{2019}', // Right single quotation mark
115            0x93 => '\u{201C}', // Left double quotation mark
116            0x94 => '\u{201D}', // Right double quotation mark
117            0x95 => '\u{2022}', // Bullet
118            0x96 => '\u{2013}', // En dash
119            0x97 => '\u{2014}', // Em dash
120            0x98 => '\u{02DC}', // Small tilde
121            0x99 => '\u{2122}', // Trade mark sign
122            0x9A => '\u{0161}', // Latin small letter s with caron
123            0x9B => '\u{203A}', // Single right-pointing angle quotation mark
124            0x9C => '\u{0153}', // Latin small ligature oe
125            0x9E => '\u{017E}', // Latin small letter z with caron
126            0x9F => '\u{0178}', // Latin capital letter Y with diaeresis
127            0x81 | 0x8D | 0x8F | 0x90 | 0x9D => {
128                // Undefined in Windows-1252, return placeholder
129                '\u{FFFD}'
130            }
131            c => char::from(c),
132        };
133
134        Ok(CalcValue::Scalar(LiteralValue::Text(
135            unicode_char.to_string(),
136        )))
137    }
138}
139
140/// CODE(text) - Returns a numeric code for the first character in a text string
141#[derive(Debug)]
142pub struct CodeFn;
143/// Returns the numeric code of the first character in text.
144///
145/// `CODE` mirrors Excel behavior with Windows-1252 compatibility mappings.
146///
147/// # Remarks
148/// - Only the first character is inspected.
149/// - Empty text returns `#VALUE!`.
150/// - Text-like coercion is applied to non-text scalar inputs.
151/// - Known Unicode characters in the Windows-1252 extension map back to their Excel codes.
152///
153/// # Examples
154///
155/// ```yaml,sandbox
156/// title: "ASCII code"
157/// formula: '=CODE("A")'
158/// expected: 65
159/// ```
160///
161/// ```yaml,sandbox
162/// title: "Extended mapping"
163/// formula: '=CODE(CHAR(128))'
164/// expected: 128
165/// ```
166///
167/// ```yaml,docs
168/// related:
169///   - CHAR
170///   - UNICODE
171///   - UNICHAR
172/// faq:
173///   - q: "What if the input text is empty?"
174///     a: "CODE returns #VALUE! because there is no first character to evaluate."
175/// ```
176/// [formualizer-docgen:schema:start]
177/// Name: CODE
178/// Type: CodeFn
179/// Min args: 1
180/// Max args: 1
181/// Variadic: false
182/// Signature: CODE(arg1: any@scalar)
183/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
184/// Caps: PURE
185/// [formualizer-docgen:schema:end]
186impl Function for CodeFn {
187    func_caps!(PURE);
188    fn name(&self) -> &'static str {
189        "CODE"
190    }
191    fn min_args(&self) -> usize {
192        1
193    }
194    fn arg_schema(&self) -> &'static [ArgSchema] {
195        &ARG_ANY_ONE[..]
196    }
197    fn eval<'a, 'b, 'c>(
198        &self,
199        args: &'c [ArgumentHandle<'a, 'b>],
200        _: &dyn FunctionContext<'b>,
201    ) -> Result<CalcValue<'b>, ExcelError> {
202        let v = scalar_like_value(&args[0])?;
203        let s = match v {
204            LiteralValue::Text(t) => t,
205            LiteralValue::Empty => {
206                return Ok(CalcValue::Scalar(LiteralValue::Error(
207                    ExcelError::new_value(),
208                )));
209            }
210            LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
211            other => other.to_string(),
212        };
213
214        if s.is_empty() {
215            return Ok(CalcValue::Scalar(LiteralValue::Error(
216                ExcelError::new_value(),
217            )));
218        }
219
220        let first_char = s.chars().next().unwrap();
221
222        // Map Unicode back to Windows-1252 for Excel compatibility
223        let code = match first_char {
224            '\u{20AC}' => 0x80, // Euro sign
225            '\u{201A}' => 0x82, // Single low-9 quotation mark
226            '\u{0192}' => 0x83, // Latin small letter f with hook
227            '\u{201E}' => 0x84, // Double low-9 quotation mark
228            '\u{2026}' => 0x85, // Horizontal ellipsis
229            '\u{2020}' => 0x86, // Dagger
230            '\u{2021}' => 0x87, // Double dagger
231            '\u{02C6}' => 0x88, // Modifier letter circumflex accent
232            '\u{2030}' => 0x89, // Per mille sign
233            '\u{0160}' => 0x8A, // Latin capital letter S with caron
234            '\u{2039}' => 0x8B, // Single left-pointing angle quotation mark
235            '\u{0152}' => 0x8C, // Latin capital ligature OE
236            '\u{017D}' => 0x8E, // Latin capital letter Z with caron
237            '\u{2018}' => 0x91, // Left single quotation mark
238            '\u{2019}' => 0x92, // Right single quotation mark
239            '\u{201C}' => 0x93, // Left double quotation mark
240            '\u{201D}' => 0x94, // Right double quotation mark
241            '\u{2022}' => 0x95, // Bullet
242            '\u{2013}' => 0x96, // En dash
243            '\u{2014}' => 0x97, // Em dash
244            '\u{02DC}' => 0x98, // Small tilde
245            '\u{2122}' => 0x99, // Trade mark sign
246            '\u{0161}' => 0x9A, // Latin small letter s with caron
247            '\u{203A}' => 0x9B, // Single right-pointing angle quotation mark
248            '\u{0153}' => 0x9C, // Latin small ligature oe
249            '\u{017E}' => 0x9E, // Latin small letter z with caron
250            '\u{0178}' => 0x9F, // Latin capital letter Y with diaeresis
251            c if (c as u32) < 256 => c as i64,
252            c => c as i64, // For characters outside Windows-1252, return Unicode code point
253        };
254
255        Ok(CalcValue::Scalar(LiteralValue::Int(code)))
256    }
257}
258
259fn asc_convert(text: &str) -> String {
260    text.chars()
261        .map(|c| {
262            let cp = c as u32;
263            if cp == 0x3000 {
264                ' '
265            } else if (0xFF01..=0xFF5E).contains(&cp) {
266                char::from_u32(cp - 0xFF01 + 0x21).unwrap_or(c)
267            } else {
268                c
269            }
270        })
271        .collect()
272}
273
274/// Converts full-width Latin and ASCII characters to half-width text.
275///
276/// Maps full-width ASCII punctuation, digits, letters, and ideographic space to
277/// their half-width equivalents while leaving other characters unchanged.
278///
279/// ```yaml,sandbox
280/// title: "Convert full-width letters and digits"
281/// formula: '=ASC("ABC123")'
282/// expected: "ABC123"
283/// ```
284///
285/// ```yaml,sandbox
286/// title: "Convert ideographic space"
287/// formula: '=ASC("A B")'
288/// expected: "A B"
289/// ```
290///
291/// ```yaml,docs
292/// related:
293///   - CHAR
294///   - CODE
295///   - UNICHAR
296/// faq:
297///   - q: "Are non-ASCII full-width characters transliterated?"
298///     a: "No. ASC only maps the full-width ASCII block and ideographic space."
299/// ```
300#[derive(Debug)]
301pub struct AscFn;
302/// [formualizer-docgen:schema:start]
303/// Name: ASC
304/// Type: AscFn
305/// Min args: 1
306/// Max args: 1
307/// Variadic: false
308/// Signature: ASC(arg1: any@scalar)
309/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
310/// Caps: PURE
311/// [formualizer-docgen:schema:end]
312impl Function for AscFn {
313    func_caps!(PURE);
314    fn name(&self) -> &'static str {
315        "ASC"
316    }
317    fn min_args(&self) -> usize {
318        1
319    }
320    fn arg_schema(&self) -> &'static [ArgSchema] {
321        &ARG_ANY_ONE[..]
322    }
323    fn eval<'a, 'b, 'c>(
324        &self,
325        args: &'c [ArgumentHandle<'a, 'b>],
326        _: &dyn FunctionContext<'b>,
327    ) -> Result<CalcValue<'b>, ExcelError> {
328        if args.len() != 1 {
329            return Ok(CalcValue::Scalar(LiteralValue::Error(
330                ExcelError::new_value(),
331            )));
332        }
333        let v = scalar_like_value(&args[0])?;
334        let s = match v {
335            LiteralValue::Text(t) => t,
336            LiteralValue::Empty => String::new(),
337            LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
338            other => other.to_string(),
339        };
340        Ok(CalcValue::Scalar(LiteralValue::Text(asc_convert(&s))))
341    }
342}
343
344/// REPT(text, number_times) - Repeats text a given number of times
345#[derive(Debug)]
346pub struct ReptFn;
347/// Repeats a text string a specified number of times.
348///
349/// # Remarks
350/// - Repeat count is truncated to an integer.
351/// - Negative counts return `#VALUE!`.
352/// - Output longer than 32,767 characters returns `#VALUE!`.
353/// - Non-text first argument is coerced to text.
354///
355/// # Examples
356///
357/// ```yaml,sandbox
358/// title: "Repeat text three times"
359/// formula: '=REPT("ab", 3)'
360/// expected: "ababab"
361/// ```
362///
363/// ```yaml,sandbox
364/// title: "Negative count"
365/// formula: '=REPT("x", -1)'
366/// expected: "#VALUE!"
367/// ```
368///
369/// ```yaml,docs
370/// related:
371///   - CONCAT
372///   - TEXTJOIN
373///   - SUBSTITUTE
374/// faq:
375///   - q: "Can REPT return very long strings?"
376///     a: "Only up to 32,767 characters; longer results return #VALUE! like Excel."
377/// ```
378/// [formualizer-docgen:schema:start]
379/// Name: REPT
380/// Type: ReptFn
381/// Min args: 2
382/// Max args: 2
383/// Variadic: false
384/// Signature: REPT(arg1: any@scalar, arg2: any@scalar)
385/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}; arg2{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
386/// Caps: PURE
387/// [formualizer-docgen:schema:end]
388impl Function for ReptFn {
389    func_caps!(PURE);
390    fn name(&self) -> &'static str {
391        "REPT"
392    }
393    fn min_args(&self) -> usize {
394        2
395    }
396    fn arg_schema(&self) -> &'static [ArgSchema] {
397        &ARG_ANY_TWO[..]
398    }
399    fn eval<'a, 'b, 'c>(
400        &self,
401        args: &'c [ArgumentHandle<'a, 'b>],
402        _: &dyn FunctionContext<'b>,
403    ) -> Result<CalcValue<'b>, ExcelError> {
404        let text_val = scalar_like_value(&args[0])?;
405        let count_val = scalar_like_value(&args[1])?;
406
407        let text = match text_val {
408            LiteralValue::Text(t) => t,
409            LiteralValue::Empty => String::new(),
410            LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
411            other => other.to_string(),
412        };
413
414        let count = match count_val {
415            LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
416            other => coerce_num(&other)?,
417        };
418
419        let count = count.trunc() as i64;
420
421        if count < 0 {
422            return Ok(CalcValue::Scalar(LiteralValue::Error(
423                ExcelError::new_value(),
424            )));
425        }
426
427        // Excel limits result to 32767 characters
428        let max_result_len = 32767;
429        let result_len = text.len() * (count as usize);
430        if result_len > max_result_len {
431            return Ok(CalcValue::Scalar(LiteralValue::Error(
432                ExcelError::new_value(),
433            )));
434        }
435
436        let result = text.repeat(count as usize);
437        Ok(CalcValue::Scalar(LiteralValue::Text(result)))
438    }
439}
440
441pub fn register_builtins() {
442    use std::sync::Arc;
443    crate::function_registry::register_function(Arc::new(CharFn));
444    crate::function_registry::register_function(Arc::new(CodeFn));
445    crate::function_registry::register_function(Arc::new(AscFn));
446    crate::function_registry::register_function(Arc::new(ReptFn));
447}
448
449#[cfg(test)]
450mod tests {
451    use super::*;
452    use crate::test_workbook::TestWorkbook;
453    use crate::traits::ArgumentHandle;
454    use formualizer_parse::parser::{ASTNode, ASTNodeType};
455
456    fn interp(wb: &TestWorkbook) -> crate::interpreter::Interpreter<'_> {
457        wb.interpreter()
458    }
459    fn lit(v: LiteralValue) -> ASTNode {
460        ASTNode::new(ASTNodeType::Literal(v), None)
461    }
462
463    #[test]
464    fn char_basic() {
465        let wb = TestWorkbook::new().with_function(std::sync::Arc::new(CharFn));
466        let ctx = interp(&wb);
467        let n = lit(LiteralValue::Number(65.0));
468        let f = ctx.context.get_function("", "CHAR").unwrap();
469        assert_eq!(
470            f.dispatch(
471                &[ArgumentHandle::new(&n, &ctx)],
472                &ctx.function_context(None)
473            )
474            .unwrap()
475            .into_literal(),
476            LiteralValue::Text("A".to_string())
477        );
478    }
479
480    #[test]
481    fn code_basic() {
482        let wb = TestWorkbook::new().with_function(std::sync::Arc::new(CodeFn));
483        let ctx = interp(&wb);
484        let s = lit(LiteralValue::Text("A".to_string()));
485        let f = ctx.context.get_function("", "CODE").unwrap();
486        assert_eq!(
487            f.dispatch(
488                &[ArgumentHandle::new(&s, &ctx)],
489                &ctx.function_context(None)
490            )
491            .unwrap()
492            .into_literal(),
493            LiteralValue::Int(65)
494        );
495    }
496
497    #[test]
498    fn asc_converts_full_width_ascii_and_space() {
499        let wb = TestWorkbook::new().with_function(std::sync::Arc::new(AscFn));
500        let ctx = interp(&wb);
501        let s = lit(LiteralValue::Text("ABC123! x".to_string()));
502        let f = ctx.context.get_function("", "ASC").unwrap();
503        assert_eq!(
504            f.dispatch(
505                &[ArgumentHandle::new(&s, &ctx)],
506                &ctx.function_context(None)
507            )
508            .unwrap()
509            .into_literal(),
510            LiteralValue::Text("ABC123! x".to_string())
511        );
512    }
513
514    #[test]
515    fn rept_basic() {
516        let wb = TestWorkbook::new().with_function(std::sync::Arc::new(ReptFn));
517        let ctx = interp(&wb);
518        let s = lit(LiteralValue::Text("ab".to_string()));
519        let n = lit(LiteralValue::Number(3.0));
520        let f = ctx.context.get_function("", "REPT").unwrap();
521        assert_eq!(
522            f.dispatch(
523                &[ArgumentHandle::new(&s, &ctx), ArgumentHandle::new(&n, &ctx)],
524                &ctx.function_context(None)
525            )
526            .unwrap()
527            .into_literal(),
528            LiteralValue::Text("ababab".to_string())
529        );
530    }
531}