formualizer_eval/builtins/text/char_code_rept.rs
1//! CHAR, CODE, REPT text functions
2
3use super::super::utils::{ARG_ANY_ONE, ARG_ANY_TWO, coerce_num};
4use crate::args::ArgSchema;
5use crate::function::Function;
6use crate::traits::{ArgumentHandle, CalcValue, FunctionContext};
7use formualizer_common::{ExcelError, ExcelErrorKind, LiteralValue};
8use formualizer_macros::func_caps;
9
10fn scalar_like_value(arg: &ArgumentHandle<'_, '_>) -> Result<LiteralValue, ExcelError> {
11 Ok(match arg.value()? {
12 CalcValue::Scalar(v) => v,
13 CalcValue::Range(rv) => rv.get_cell(0, 0),
14 CalcValue::Callable(_) => LiteralValue::Error(
15 ExcelError::new(ExcelErrorKind::Calc).with_message("LAMBDA value must be invoked"),
16 ),
17 })
18}
19
20/// CHAR(number) - Returns the character specified by a number
21/// Excel uses Windows-1252 encoding for codes 1-255
22#[derive(Debug)]
23pub struct CharFn;
24/// Returns the character represented by a numeric code.
25///
26/// `CHAR` follows Excel-style Windows-1252 behavior for codes `1..255`.
27///
28/// # Remarks
29/// - Input is truncated to an integer code.
30/// - Valid code range is `1` through `255`; outside this range returns `#VALUE!`.
31/// - Codes in the Windows-1252 extension range (128-159) are mapped to Unicode equivalents.
32/// - Errors are propagated unchanged.
33///
34/// # Examples
35///
36/// ```yaml,sandbox
37/// title: "ASCII character"
38/// formula: '=CHAR(65)'
39/// expected: "A"
40/// ```
41///
42/// ```yaml,sandbox
43/// title: "Out-of-range code"
44/// formula: '=CHAR(300)'
45/// expected: "#VALUE!"
46/// ```
47///
48/// ```yaml,docs
49/// related:
50/// - CODE
51/// - UNICHAR
52/// - UNICODE
53/// faq:
54/// - q: "Which character set does CHAR use for codes 128-159?"
55/// a: "It follows Excel-style Windows-1252 mappings, including extended symbols in that range."
56/// ```
57/// [formualizer-docgen:schema:start]
58/// Name: CHAR
59/// Type: CharFn
60/// Min args: 1
61/// Max args: 1
62/// Variadic: false
63/// Signature: CHAR(arg1: any@scalar)
64/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
65/// Caps: PURE
66/// [formualizer-docgen:schema:end]
67impl Function for CharFn {
68 func_caps!(PURE);
69 fn name(&self) -> &'static str {
70 "CHAR"
71 }
72 fn min_args(&self) -> usize {
73 1
74 }
75 fn arg_schema(&self) -> &'static [ArgSchema] {
76 &ARG_ANY_ONE[..]
77 }
78 fn eval<'a, 'b, 'c>(
79 &self,
80 args: &'c [ArgumentHandle<'a, 'b>],
81 _: &dyn FunctionContext<'b>,
82 ) -> Result<CalcValue<'b>, ExcelError> {
83 let v = scalar_like_value(&args[0])?;
84 let n = match v {
85 LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
86 other => coerce_num(&other)?,
87 };
88
89 let code = n.trunc() as i32;
90
91 // Excel CHAR accepts 1-255
92 if !(1..=255).contains(&code) {
93 return Ok(CalcValue::Scalar(LiteralValue::Error(
94 ExcelError::new_value(),
95 )));
96 }
97
98 // Windows-1252 to Unicode mapping for codes 128-159
99 let unicode_char = match code as u8 {
100 0x80 => '\u{20AC}', // Euro sign
101 0x82 => '\u{201A}', // Single low-9 quotation mark
102 0x83 => '\u{0192}', // Latin small letter f with hook
103 0x84 => '\u{201E}', // Double low-9 quotation mark
104 0x85 => '\u{2026}', // Horizontal ellipsis
105 0x86 => '\u{2020}', // Dagger
106 0x87 => '\u{2021}', // Double dagger
107 0x88 => '\u{02C6}', // Modifier letter circumflex accent
108 0x89 => '\u{2030}', // Per mille sign
109 0x8A => '\u{0160}', // Latin capital letter S with caron
110 0x8B => '\u{2039}', // Single left-pointing angle quotation mark
111 0x8C => '\u{0152}', // Latin capital ligature OE
112 0x8E => '\u{017D}', // Latin capital letter Z with caron
113 0x91 => '\u{2018}', // Left single quotation mark
114 0x92 => '\u{2019}', // Right single quotation mark
115 0x93 => '\u{201C}', // Left double quotation mark
116 0x94 => '\u{201D}', // Right double quotation mark
117 0x95 => '\u{2022}', // Bullet
118 0x96 => '\u{2013}', // En dash
119 0x97 => '\u{2014}', // Em dash
120 0x98 => '\u{02DC}', // Small tilde
121 0x99 => '\u{2122}', // Trade mark sign
122 0x9A => '\u{0161}', // Latin small letter s with caron
123 0x9B => '\u{203A}', // Single right-pointing angle quotation mark
124 0x9C => '\u{0153}', // Latin small ligature oe
125 0x9E => '\u{017E}', // Latin small letter z with caron
126 0x9F => '\u{0178}', // Latin capital letter Y with diaeresis
127 0x81 | 0x8D | 0x8F | 0x90 | 0x9D => {
128 // Undefined in Windows-1252, return placeholder
129 '\u{FFFD}'
130 }
131 c => char::from(c),
132 };
133
134 Ok(CalcValue::Scalar(LiteralValue::Text(
135 unicode_char.to_string(),
136 )))
137 }
138}
139
140/// CODE(text) - Returns a numeric code for the first character in a text string
141#[derive(Debug)]
142pub struct CodeFn;
143/// Returns the numeric code of the first character in text.
144///
145/// `CODE` mirrors Excel behavior with Windows-1252 compatibility mappings.
146///
147/// # Remarks
148/// - Only the first character is inspected.
149/// - Empty text returns `#VALUE!`.
150/// - Text-like coercion is applied to non-text scalar inputs.
151/// - Known Unicode characters in the Windows-1252 extension map back to their Excel codes.
152///
153/// # Examples
154///
155/// ```yaml,sandbox
156/// title: "ASCII code"
157/// formula: '=CODE("A")'
158/// expected: 65
159/// ```
160///
161/// ```yaml,sandbox
162/// title: "Extended mapping"
163/// formula: '=CODE(CHAR(128))'
164/// expected: 128
165/// ```
166///
167/// ```yaml,docs
168/// related:
169/// - CHAR
170/// - UNICODE
171/// - UNICHAR
172/// faq:
173/// - q: "What if the input text is empty?"
174/// a: "CODE returns #VALUE! because there is no first character to evaluate."
175/// ```
176/// [formualizer-docgen:schema:start]
177/// Name: CODE
178/// Type: CodeFn
179/// Min args: 1
180/// Max args: 1
181/// Variadic: false
182/// Signature: CODE(arg1: any@scalar)
183/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
184/// Caps: PURE
185/// [formualizer-docgen:schema:end]
186impl Function for CodeFn {
187 func_caps!(PURE);
188 fn name(&self) -> &'static str {
189 "CODE"
190 }
191 fn min_args(&self) -> usize {
192 1
193 }
194 fn arg_schema(&self) -> &'static [ArgSchema] {
195 &ARG_ANY_ONE[..]
196 }
197 fn eval<'a, 'b, 'c>(
198 &self,
199 args: &'c [ArgumentHandle<'a, 'b>],
200 _: &dyn FunctionContext<'b>,
201 ) -> Result<CalcValue<'b>, ExcelError> {
202 let v = scalar_like_value(&args[0])?;
203 let s = match v {
204 LiteralValue::Text(t) => t,
205 LiteralValue::Empty => {
206 return Ok(CalcValue::Scalar(LiteralValue::Error(
207 ExcelError::new_value(),
208 )));
209 }
210 LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
211 other => other.to_string(),
212 };
213
214 if s.is_empty() {
215 return Ok(CalcValue::Scalar(LiteralValue::Error(
216 ExcelError::new_value(),
217 )));
218 }
219
220 let first_char = s.chars().next().unwrap();
221
222 // Map Unicode back to Windows-1252 for Excel compatibility
223 let code = match first_char {
224 '\u{20AC}' => 0x80, // Euro sign
225 '\u{201A}' => 0x82, // Single low-9 quotation mark
226 '\u{0192}' => 0x83, // Latin small letter f with hook
227 '\u{201E}' => 0x84, // Double low-9 quotation mark
228 '\u{2026}' => 0x85, // Horizontal ellipsis
229 '\u{2020}' => 0x86, // Dagger
230 '\u{2021}' => 0x87, // Double dagger
231 '\u{02C6}' => 0x88, // Modifier letter circumflex accent
232 '\u{2030}' => 0x89, // Per mille sign
233 '\u{0160}' => 0x8A, // Latin capital letter S with caron
234 '\u{2039}' => 0x8B, // Single left-pointing angle quotation mark
235 '\u{0152}' => 0x8C, // Latin capital ligature OE
236 '\u{017D}' => 0x8E, // Latin capital letter Z with caron
237 '\u{2018}' => 0x91, // Left single quotation mark
238 '\u{2019}' => 0x92, // Right single quotation mark
239 '\u{201C}' => 0x93, // Left double quotation mark
240 '\u{201D}' => 0x94, // Right double quotation mark
241 '\u{2022}' => 0x95, // Bullet
242 '\u{2013}' => 0x96, // En dash
243 '\u{2014}' => 0x97, // Em dash
244 '\u{02DC}' => 0x98, // Small tilde
245 '\u{2122}' => 0x99, // Trade mark sign
246 '\u{0161}' => 0x9A, // Latin small letter s with caron
247 '\u{203A}' => 0x9B, // Single right-pointing angle quotation mark
248 '\u{0153}' => 0x9C, // Latin small ligature oe
249 '\u{017E}' => 0x9E, // Latin small letter z with caron
250 '\u{0178}' => 0x9F, // Latin capital letter Y with diaeresis
251 c if (c as u32) < 256 => c as i64,
252 c => c as i64, // For characters outside Windows-1252, return Unicode code point
253 };
254
255 Ok(CalcValue::Scalar(LiteralValue::Int(code)))
256 }
257}
258
259/// REPT(text, number_times) - Repeats text a given number of times
260#[derive(Debug)]
261pub struct ReptFn;
262/// Repeats a text string a specified number of times.
263///
264/// # Remarks
265/// - Repeat count is truncated to an integer.
266/// - Negative counts return `#VALUE!`.
267/// - Output longer than 32,767 characters returns `#VALUE!`.
268/// - Non-text first argument is coerced to text.
269///
270/// # Examples
271///
272/// ```yaml,sandbox
273/// title: "Repeat text three times"
274/// formula: '=REPT("ab", 3)'
275/// expected: "ababab"
276/// ```
277///
278/// ```yaml,sandbox
279/// title: "Negative count"
280/// formula: '=REPT("x", -1)'
281/// expected: "#VALUE!"
282/// ```
283///
284/// ```yaml,docs
285/// related:
286/// - CONCAT
287/// - TEXTJOIN
288/// - SUBSTITUTE
289/// faq:
290/// - q: "Can REPT return very long strings?"
291/// a: "Only up to 32,767 characters; longer results return #VALUE! like Excel."
292/// ```
293/// [formualizer-docgen:schema:start]
294/// Name: REPT
295/// Type: ReptFn
296/// Min args: 2
297/// Max args: 2
298/// Variadic: false
299/// Signature: REPT(arg1: any@scalar, arg2: any@scalar)
300/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}; arg2{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
301/// Caps: PURE
302/// [formualizer-docgen:schema:end]
303impl Function for ReptFn {
304 func_caps!(PURE);
305 fn name(&self) -> &'static str {
306 "REPT"
307 }
308 fn min_args(&self) -> usize {
309 2
310 }
311 fn arg_schema(&self) -> &'static [ArgSchema] {
312 &ARG_ANY_TWO[..]
313 }
314 fn eval<'a, 'b, 'c>(
315 &self,
316 args: &'c [ArgumentHandle<'a, 'b>],
317 _: &dyn FunctionContext<'b>,
318 ) -> Result<CalcValue<'b>, ExcelError> {
319 let text_val = scalar_like_value(&args[0])?;
320 let count_val = scalar_like_value(&args[1])?;
321
322 let text = match text_val {
323 LiteralValue::Text(t) => t,
324 LiteralValue::Empty => String::new(),
325 LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
326 other => other.to_string(),
327 };
328
329 let count = match count_val {
330 LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
331 other => coerce_num(&other)?,
332 };
333
334 let count = count.trunc() as i64;
335
336 if count < 0 {
337 return Ok(CalcValue::Scalar(LiteralValue::Error(
338 ExcelError::new_value(),
339 )));
340 }
341
342 // Excel limits result to 32767 characters
343 let max_result_len = 32767;
344 let result_len = text.len() * (count as usize);
345 if result_len > max_result_len {
346 return Ok(CalcValue::Scalar(LiteralValue::Error(
347 ExcelError::new_value(),
348 )));
349 }
350
351 let result = text.repeat(count as usize);
352 Ok(CalcValue::Scalar(LiteralValue::Text(result)))
353 }
354}
355
356pub fn register_builtins() {
357 use std::sync::Arc;
358 crate::function_registry::register_function(Arc::new(CharFn));
359 crate::function_registry::register_function(Arc::new(CodeFn));
360 crate::function_registry::register_function(Arc::new(ReptFn));
361}
362
363#[cfg(test)]
364mod tests {
365 use super::*;
366 use crate::test_workbook::TestWorkbook;
367 use crate::traits::ArgumentHandle;
368 use formualizer_parse::parser::{ASTNode, ASTNodeType};
369
370 fn interp(wb: &TestWorkbook) -> crate::interpreter::Interpreter<'_> {
371 wb.interpreter()
372 }
373 fn lit(v: LiteralValue) -> ASTNode {
374 ASTNode::new(ASTNodeType::Literal(v), None)
375 }
376
377 #[test]
378 fn char_basic() {
379 let wb = TestWorkbook::new().with_function(std::sync::Arc::new(CharFn));
380 let ctx = interp(&wb);
381 let n = lit(LiteralValue::Number(65.0));
382 let f = ctx.context.get_function("", "CHAR").unwrap();
383 assert_eq!(
384 f.dispatch(
385 &[ArgumentHandle::new(&n, &ctx)],
386 &ctx.function_context(None)
387 )
388 .unwrap()
389 .into_literal(),
390 LiteralValue::Text("A".to_string())
391 );
392 }
393
394 #[test]
395 fn code_basic() {
396 let wb = TestWorkbook::new().with_function(std::sync::Arc::new(CodeFn));
397 let ctx = interp(&wb);
398 let s = lit(LiteralValue::Text("A".to_string()));
399 let f = ctx.context.get_function("", "CODE").unwrap();
400 assert_eq!(
401 f.dispatch(
402 &[ArgumentHandle::new(&s, &ctx)],
403 &ctx.function_context(None)
404 )
405 .unwrap()
406 .into_literal(),
407 LiteralValue::Int(65)
408 );
409 }
410
411 #[test]
412 fn rept_basic() {
413 let wb = TestWorkbook::new().with_function(std::sync::Arc::new(ReptFn));
414 let ctx = interp(&wb);
415 let s = lit(LiteralValue::Text("ab".to_string()));
416 let n = lit(LiteralValue::Number(3.0));
417 let f = ctx.context.get_function("", "REPT").unwrap();
418 assert_eq!(
419 f.dispatch(
420 &[ArgumentHandle::new(&s, &ctx), ArgumentHandle::new(&n, &ctx)],
421 &ctx.function_context(None)
422 )
423 .unwrap()
424 .into_literal(),
425 LiteralValue::Text("ababab".to_string())
426 );
427 }
428}