formualizer_eval/builtins/text/char_code_rept.rs
1//! CHAR, CODE, REPT text functions
2
3use super::super::utils::{ARG_ANY_ONE, ARG_ANY_TWO, coerce_num};
4use crate::args::ArgSchema;
5use crate::function::Function;
6use crate::traits::{ArgumentHandle, CalcValue, FunctionContext};
7use formualizer_common::{ExcelError, ExcelErrorKind, LiteralValue};
8use formualizer_macros::func_caps;
9
10fn scalar_like_value(arg: &ArgumentHandle<'_, '_>) -> Result<LiteralValue, ExcelError> {
11 Ok(match arg.value()? {
12 CalcValue::Scalar(v) => v,
13 CalcValue::Range(rv) => rv.get_cell(0, 0),
14 CalcValue::Callable(_) => LiteralValue::Error(
15 ExcelError::new(ExcelErrorKind::Calc).with_message("LAMBDA value must be invoked"),
16 ),
17 })
18}
19
20/// CHAR(number) - Returns the character specified by a number
21/// Excel uses Windows-1252 encoding for codes 1-255
22#[derive(Debug)]
23pub struct CharFn;
24/// Returns the character represented by a numeric code.
25///
26/// `CHAR` follows Excel-style Windows-1252 behavior for codes `1..255`.
27///
28/// # Remarks
29/// - Input is truncated to an integer code.
30/// - Valid code range is `1` through `255`; outside this range returns `#VALUE!`.
31/// - Codes in the Windows-1252 extension range (128-159) are mapped to Unicode equivalents.
32/// - Errors are propagated unchanged.
33///
34/// # Examples
35///
36/// ```yaml,sandbox
37/// title: "ASCII character"
38/// formula: '=CHAR(65)'
39/// expected: "A"
40/// ```
41///
42/// ```yaml,sandbox
43/// title: "Out-of-range code"
44/// formula: '=CHAR(300)'
45/// expected: "#VALUE!"
46/// ```
47///
48/// ```yaml,docs
49/// related:
50/// - CODE
51/// - UNICHAR
52/// - UNICODE
53/// faq:
54/// - q: "Which character set does CHAR use for codes 128-159?"
55/// a: "It follows Excel-style Windows-1252 mappings, including extended symbols in that range."
56/// ```
57/// [formualizer-docgen:schema:start]
58/// Name: CHAR
59/// Type: CharFn
60/// Min args: 1
61/// Max args: 1
62/// Variadic: false
63/// Signature: CHAR(arg1: any@scalar)
64/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
65/// Caps: PURE
66/// [formualizer-docgen:schema:end]
67impl Function for CharFn {
68 func_caps!(PURE);
69 fn name(&self) -> &'static str {
70 "CHAR"
71 }
72 fn min_args(&self) -> usize {
73 1
74 }
75 fn arg_schema(&self) -> &'static [ArgSchema] {
76 &ARG_ANY_ONE[..]
77 }
78 fn eval<'a, 'b, 'c>(
79 &self,
80 args: &'c [ArgumentHandle<'a, 'b>],
81 _: &dyn FunctionContext<'b>,
82 ) -> Result<CalcValue<'b>, ExcelError> {
83 let v = scalar_like_value(&args[0])?;
84 let n = match v {
85 LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
86 other => coerce_num(&other)?,
87 };
88
89 let code = n.trunc() as i32;
90
91 // Excel CHAR accepts 1-255
92 if !(1..=255).contains(&code) {
93 return Ok(CalcValue::Scalar(LiteralValue::Error(
94 ExcelError::new_value(),
95 )));
96 }
97
98 // Windows-1252 to Unicode mapping for codes 128-159
99 let unicode_char = match code as u8 {
100 0x80 => '\u{20AC}', // Euro sign
101 0x82 => '\u{201A}', // Single low-9 quotation mark
102 0x83 => '\u{0192}', // Latin small letter f with hook
103 0x84 => '\u{201E}', // Double low-9 quotation mark
104 0x85 => '\u{2026}', // Horizontal ellipsis
105 0x86 => '\u{2020}', // Dagger
106 0x87 => '\u{2021}', // Double dagger
107 0x88 => '\u{02C6}', // Modifier letter circumflex accent
108 0x89 => '\u{2030}', // Per mille sign
109 0x8A => '\u{0160}', // Latin capital letter S with caron
110 0x8B => '\u{2039}', // Single left-pointing angle quotation mark
111 0x8C => '\u{0152}', // Latin capital ligature OE
112 0x8E => '\u{017D}', // Latin capital letter Z with caron
113 0x91 => '\u{2018}', // Left single quotation mark
114 0x92 => '\u{2019}', // Right single quotation mark
115 0x93 => '\u{201C}', // Left double quotation mark
116 0x94 => '\u{201D}', // Right double quotation mark
117 0x95 => '\u{2022}', // Bullet
118 0x96 => '\u{2013}', // En dash
119 0x97 => '\u{2014}', // Em dash
120 0x98 => '\u{02DC}', // Small tilde
121 0x99 => '\u{2122}', // Trade mark sign
122 0x9A => '\u{0161}', // Latin small letter s with caron
123 0x9B => '\u{203A}', // Single right-pointing angle quotation mark
124 0x9C => '\u{0153}', // Latin small ligature oe
125 0x9E => '\u{017E}', // Latin small letter z with caron
126 0x9F => '\u{0178}', // Latin capital letter Y with diaeresis
127 0x81 | 0x8D | 0x8F | 0x90 | 0x9D => {
128 // Undefined in Windows-1252, return placeholder
129 '\u{FFFD}'
130 }
131 c => char::from(c),
132 };
133
134 Ok(CalcValue::Scalar(LiteralValue::Text(
135 unicode_char.to_string(),
136 )))
137 }
138}
139
140/// CODE(text) - Returns a numeric code for the first character in a text string
141#[derive(Debug)]
142pub struct CodeFn;
143/// Returns the numeric code of the first character in text.
144///
145/// `CODE` mirrors Excel behavior with Windows-1252 compatibility mappings.
146///
147/// # Remarks
148/// - Only the first character is inspected.
149/// - Empty text returns `#VALUE!`.
150/// - Text-like coercion is applied to non-text scalar inputs.
151/// - Known Unicode characters in the Windows-1252 extension map back to their Excel codes.
152///
153/// # Examples
154///
155/// ```yaml,sandbox
156/// title: "ASCII code"
157/// formula: '=CODE("A")'
158/// expected: 65
159/// ```
160///
161/// ```yaml,sandbox
162/// title: "Extended mapping"
163/// formula: '=CODE(CHAR(128))'
164/// expected: 128
165/// ```
166///
167/// ```yaml,docs
168/// related:
169/// - CHAR
170/// - UNICODE
171/// - UNICHAR
172/// faq:
173/// - q: "What if the input text is empty?"
174/// a: "CODE returns #VALUE! because there is no first character to evaluate."
175/// ```
176/// [formualizer-docgen:schema:start]
177/// Name: CODE
178/// Type: CodeFn
179/// Min args: 1
180/// Max args: 1
181/// Variadic: false
182/// Signature: CODE(arg1: any@scalar)
183/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
184/// Caps: PURE
185/// [formualizer-docgen:schema:end]
186impl Function for CodeFn {
187 func_caps!(PURE);
188 fn name(&self) -> &'static str {
189 "CODE"
190 }
191 fn min_args(&self) -> usize {
192 1
193 }
194 fn arg_schema(&self) -> &'static [ArgSchema] {
195 &ARG_ANY_ONE[..]
196 }
197 fn eval<'a, 'b, 'c>(
198 &self,
199 args: &'c [ArgumentHandle<'a, 'b>],
200 _: &dyn FunctionContext<'b>,
201 ) -> Result<CalcValue<'b>, ExcelError> {
202 let v = scalar_like_value(&args[0])?;
203 let s = match v {
204 LiteralValue::Text(t) => t,
205 LiteralValue::Empty => {
206 return Ok(CalcValue::Scalar(LiteralValue::Error(
207 ExcelError::new_value(),
208 )));
209 }
210 LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
211 other => other.to_string(),
212 };
213
214 if s.is_empty() {
215 return Ok(CalcValue::Scalar(LiteralValue::Error(
216 ExcelError::new_value(),
217 )));
218 }
219
220 let first_char = s.chars().next().unwrap();
221
222 // Map Unicode back to Windows-1252 for Excel compatibility
223 let code = match first_char {
224 '\u{20AC}' => 0x80, // Euro sign
225 '\u{201A}' => 0x82, // Single low-9 quotation mark
226 '\u{0192}' => 0x83, // Latin small letter f with hook
227 '\u{201E}' => 0x84, // Double low-9 quotation mark
228 '\u{2026}' => 0x85, // Horizontal ellipsis
229 '\u{2020}' => 0x86, // Dagger
230 '\u{2021}' => 0x87, // Double dagger
231 '\u{02C6}' => 0x88, // Modifier letter circumflex accent
232 '\u{2030}' => 0x89, // Per mille sign
233 '\u{0160}' => 0x8A, // Latin capital letter S with caron
234 '\u{2039}' => 0x8B, // Single left-pointing angle quotation mark
235 '\u{0152}' => 0x8C, // Latin capital ligature OE
236 '\u{017D}' => 0x8E, // Latin capital letter Z with caron
237 '\u{2018}' => 0x91, // Left single quotation mark
238 '\u{2019}' => 0x92, // Right single quotation mark
239 '\u{201C}' => 0x93, // Left double quotation mark
240 '\u{201D}' => 0x94, // Right double quotation mark
241 '\u{2022}' => 0x95, // Bullet
242 '\u{2013}' => 0x96, // En dash
243 '\u{2014}' => 0x97, // Em dash
244 '\u{02DC}' => 0x98, // Small tilde
245 '\u{2122}' => 0x99, // Trade mark sign
246 '\u{0161}' => 0x9A, // Latin small letter s with caron
247 '\u{203A}' => 0x9B, // Single right-pointing angle quotation mark
248 '\u{0153}' => 0x9C, // Latin small ligature oe
249 '\u{017E}' => 0x9E, // Latin small letter z with caron
250 '\u{0178}' => 0x9F, // Latin capital letter Y with diaeresis
251 c if (c as u32) < 256 => c as i64,
252 c => c as i64, // For characters outside Windows-1252, return Unicode code point
253 };
254
255 Ok(CalcValue::Scalar(LiteralValue::Int(code)))
256 }
257}
258
259fn asc_convert(text: &str) -> String {
260 text.chars()
261 .map(|c| {
262 let cp = c as u32;
263 if cp == 0x3000 {
264 ' '
265 } else if (0xFF01..=0xFF5E).contains(&cp) {
266 char::from_u32(cp - 0xFF01 + 0x21).unwrap_or(c)
267 } else {
268 c
269 }
270 })
271 .collect()
272}
273
274/// Converts full-width Latin and ASCII characters to half-width text.
275///
276/// Maps full-width ASCII punctuation, digits, letters, and ideographic space to
277/// their half-width equivalents while leaving other characters unchanged.
278///
279/// ```yaml,sandbox
280/// title: "Convert full-width letters and digits"
281/// formula: '=ASC("ABC123")'
282/// expected: "ABC123"
283/// ```
284///
285/// ```yaml,sandbox
286/// title: "Convert ideographic space"
287/// formula: '=ASC("A B")'
288/// expected: "A B"
289/// ```
290///
291/// ```yaml,docs
292/// related:
293/// - CHAR
294/// - CODE
295/// - UNICHAR
296/// faq:
297/// - q: "Are non-ASCII full-width characters transliterated?"
298/// a: "No. ASC only maps the full-width ASCII block and ideographic space."
299/// ```
300#[derive(Debug)]
301pub struct AscFn;
302/// [formualizer-docgen:schema:start]
303/// Name: ASC
304/// Type: AscFn
305/// Min args: 1
306/// Max args: 1
307/// Variadic: false
308/// Signature: ASC(arg1: any@scalar)
309/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
310/// Caps: PURE
311/// [formualizer-docgen:schema:end]
312impl Function for AscFn {
313 func_caps!(PURE);
314 fn name(&self) -> &'static str {
315 "ASC"
316 }
317 fn min_args(&self) -> usize {
318 1
319 }
320 fn arg_schema(&self) -> &'static [ArgSchema] {
321 &ARG_ANY_ONE[..]
322 }
323 fn eval<'a, 'b, 'c>(
324 &self,
325 args: &'c [ArgumentHandle<'a, 'b>],
326 _: &dyn FunctionContext<'b>,
327 ) -> Result<CalcValue<'b>, ExcelError> {
328 if args.len() != 1 {
329 return Ok(CalcValue::Scalar(LiteralValue::Error(
330 ExcelError::new_value(),
331 )));
332 }
333 let v = scalar_like_value(&args[0])?;
334 let s = match v {
335 LiteralValue::Text(t) => t,
336 LiteralValue::Empty => String::new(),
337 LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
338 other => other.to_string(),
339 };
340 Ok(CalcValue::Scalar(LiteralValue::Text(asc_convert(&s))))
341 }
342}
343
344/// REPT(text, number_times) - Repeats text a given number of times
345#[derive(Debug)]
346pub struct ReptFn;
347/// Repeats a text string a specified number of times.
348///
349/// # Remarks
350/// - Repeat count is truncated to an integer.
351/// - Negative counts return `#VALUE!`.
352/// - Output longer than 32,767 characters returns `#VALUE!`.
353/// - Non-text first argument is coerced to text.
354///
355/// # Examples
356///
357/// ```yaml,sandbox
358/// title: "Repeat text three times"
359/// formula: '=REPT("ab", 3)'
360/// expected: "ababab"
361/// ```
362///
363/// ```yaml,sandbox
364/// title: "Negative count"
365/// formula: '=REPT("x", -1)'
366/// expected: "#VALUE!"
367/// ```
368///
369/// ```yaml,docs
370/// related:
371/// - CONCAT
372/// - TEXTJOIN
373/// - SUBSTITUTE
374/// faq:
375/// - q: "Can REPT return very long strings?"
376/// a: "Only up to 32,767 characters; longer results return #VALUE! like Excel."
377/// ```
378/// [formualizer-docgen:schema:start]
379/// Name: REPT
380/// Type: ReptFn
381/// Min args: 2
382/// Max args: 2
383/// Variadic: false
384/// Signature: REPT(arg1: any@scalar, arg2: any@scalar)
385/// Arg schema: arg1{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}; arg2{kinds=any,required=true,shape=scalar,by_ref=false,coercion=None,max=None,repeating=None,default=false}
386/// Caps: PURE
387/// [formualizer-docgen:schema:end]
388impl Function for ReptFn {
389 func_caps!(PURE);
390 fn name(&self) -> &'static str {
391 "REPT"
392 }
393 fn min_args(&self) -> usize {
394 2
395 }
396 fn arg_schema(&self) -> &'static [ArgSchema] {
397 &ARG_ANY_TWO[..]
398 }
399 fn eval<'a, 'b, 'c>(
400 &self,
401 args: &'c [ArgumentHandle<'a, 'b>],
402 _: &dyn FunctionContext<'b>,
403 ) -> Result<CalcValue<'b>, ExcelError> {
404 let text_val = scalar_like_value(&args[0])?;
405 let count_val = scalar_like_value(&args[1])?;
406
407 let text = match text_val {
408 LiteralValue::Text(t) => t,
409 LiteralValue::Empty => String::new(),
410 LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
411 other => other.to_string(),
412 };
413
414 let count = match count_val {
415 LiteralValue::Error(e) => return Ok(CalcValue::Scalar(LiteralValue::Error(e))),
416 other => coerce_num(&other)?,
417 };
418
419 let count = count.trunc() as i64;
420
421 if count < 0 {
422 return Ok(CalcValue::Scalar(LiteralValue::Error(
423 ExcelError::new_value(),
424 )));
425 }
426
427 // Excel limits result to 32767 characters
428 let max_result_len = 32767;
429 let result_len = text.len() * (count as usize);
430 if result_len > max_result_len {
431 return Ok(CalcValue::Scalar(LiteralValue::Error(
432 ExcelError::new_value(),
433 )));
434 }
435
436 let result = text.repeat(count as usize);
437 Ok(CalcValue::Scalar(LiteralValue::Text(result)))
438 }
439}
440
441pub fn register_builtins() {
442 use std::sync::Arc;
443 crate::function_registry::register_function(Arc::new(CharFn));
444 crate::function_registry::register_function(Arc::new(CodeFn));
445 crate::function_registry::register_function(Arc::new(AscFn));
446 crate::function_registry::register_function(Arc::new(ReptFn));
447}
448
449#[cfg(test)]
450mod tests {
451 use super::*;
452 use crate::test_workbook::TestWorkbook;
453 use crate::traits::ArgumentHandle;
454 use formualizer_parse::parser::{ASTNode, ASTNodeType};
455
456 fn interp(wb: &TestWorkbook) -> crate::interpreter::Interpreter<'_> {
457 wb.interpreter()
458 }
459 fn lit(v: LiteralValue) -> ASTNode {
460 ASTNode::new(ASTNodeType::Literal(v), None)
461 }
462
463 #[test]
464 fn char_basic() {
465 let wb = TestWorkbook::new().with_function(std::sync::Arc::new(CharFn));
466 let ctx = interp(&wb);
467 let n = lit(LiteralValue::Number(65.0));
468 let f = ctx.context.get_function("", "CHAR").unwrap();
469 assert_eq!(
470 f.dispatch(
471 &[ArgumentHandle::new(&n, &ctx)],
472 &ctx.function_context(None)
473 )
474 .unwrap()
475 .into_literal(),
476 LiteralValue::Text("A".to_string())
477 );
478 }
479
480 #[test]
481 fn code_basic() {
482 let wb = TestWorkbook::new().with_function(std::sync::Arc::new(CodeFn));
483 let ctx = interp(&wb);
484 let s = lit(LiteralValue::Text("A".to_string()));
485 let f = ctx.context.get_function("", "CODE").unwrap();
486 assert_eq!(
487 f.dispatch(
488 &[ArgumentHandle::new(&s, &ctx)],
489 &ctx.function_context(None)
490 )
491 .unwrap()
492 .into_literal(),
493 LiteralValue::Int(65)
494 );
495 }
496
497 #[test]
498 fn asc_converts_full_width_ascii_and_space() {
499 let wb = TestWorkbook::new().with_function(std::sync::Arc::new(AscFn));
500 let ctx = interp(&wb);
501 let s = lit(LiteralValue::Text("ABC123! x".to_string()));
502 let f = ctx.context.get_function("", "ASC").unwrap();
503 assert_eq!(
504 f.dispatch(
505 &[ArgumentHandle::new(&s, &ctx)],
506 &ctx.function_context(None)
507 )
508 .unwrap()
509 .into_literal(),
510 LiteralValue::Text("ABC123! x".to_string())
511 );
512 }
513
514 #[test]
515 fn rept_basic() {
516 let wb = TestWorkbook::new().with_function(std::sync::Arc::new(ReptFn));
517 let ctx = interp(&wb);
518 let s = lit(LiteralValue::Text("ab".to_string()));
519 let n = lit(LiteralValue::Number(3.0));
520 let f = ctx.context.get_function("", "REPT").unwrap();
521 assert_eq!(
522 f.dispatch(
523 &[ArgumentHandle::new(&s, &ctx), ArgumentHandle::new(&n, &ctx)],
524 &ctx.function_context(None)
525 )
526 .unwrap()
527 .into_literal(),
528 LiteralValue::Text("ababab".to_string())
529 );
530 }
531}