Skip to main content

nu_command/strings/
char_.rs

1use indexmap::{IndexMap, indexmap};
2use nu_engine::command_prelude::*;
3use nu_protocol::{Parameter, Signals};
4use nu_utils::consts::{ENV_PATH_SEPARATOR_CHAR, LINE_SEPARATOR_STR};
5use std::collections::HashSet;
6use std::sync::LazyLock;
7
8#[derive(Clone)]
9pub struct Char;
10
11static CHAR_MAP: LazyLock<IndexMap<&'static str, String>> = LazyLock::new(|| {
12    indexmap! {
13        // These are some regular characters that either can't be used or
14        // it's just easier to use them like this.
15
16        "nul" => '\x00'.to_string(),                                // nul character, 0x00
17        "null_byte" => '\x00'.to_string(),                          // nul character, 0x00
18        "zero_byte" => '\x00'.to_string(),                          // nul character, 0x00
19        // This are the "normal" characters section
20        "newline" => '\n'.to_string(),
21        "enter" => '\n'.to_string(),
22        "nl" => '\n'.to_string(),
23        "line_feed" => '\n'.to_string(),
24        "lf" => '\n'.to_string(),
25        "carriage_return" => '\r'.to_string(),
26        "cr" => '\r'.to_string(),
27        "crlf" => "\r\n".to_string(),
28        "tab" => '\t'.to_string(),
29        "sp" => ' '.to_string(),
30        "space" => ' '.to_string(),
31        "pipe" => '|'.to_string(),
32        "left_brace" => '{'.to_string(),
33        "lbrace" => '{'.to_string(),
34        "right_brace" => '}'.to_string(),
35        "rbrace" => '}'.to_string(),
36        "left_paren" => '('.to_string(),
37        "lp" => '('.to_string(),
38        "lparen" => '('.to_string(),
39        "right_paren" => ')'.to_string(),
40        "rparen" => ')'.to_string(),
41        "rp" => ')'.to_string(),
42        "left_bracket" => '['.to_string(),
43        "lbracket" => '['.to_string(),
44        "right_bracket" => ']'.to_string(),
45        "rbracket" => ']'.to_string(),
46        "single_quote" => '\''.to_string(),
47        "squote" => '\''.to_string(),
48        "sq" => '\''.to_string(),
49        "double_quote" => '\"'.to_string(),
50        "dquote" => '\"'.to_string(),
51        "dq" => '\"'.to_string(),
52        "path_sep" => std::path::MAIN_SEPARATOR.to_string(),
53        "psep" => std::path::MAIN_SEPARATOR.to_string(),
54        "separator" => std::path::MAIN_SEPARATOR.to_string(),
55        "eol" => LINE_SEPARATOR_STR.to_string(),
56        "lsep" => LINE_SEPARATOR_STR.to_string(),
57        "line_sep" => LINE_SEPARATOR_STR.to_string(),
58        "esep" => ENV_PATH_SEPARATOR_CHAR.to_string(),
59        "env_sep" => ENV_PATH_SEPARATOR_CHAR.to_string(),
60        "tilde" => '~'.to_string(),                                // ~
61        "twiddle" => '~'.to_string(),                              // ~
62        "squiggly" => '~'.to_string(),                             // ~
63        "home" => '~'.to_string(),                                 // ~
64        "hash" => '#'.to_string(),                                 // #
65        "hashtag" => '#'.to_string(),                              // #
66        "pound_sign" => '#'.to_string(),                           // #
67        "sharp" => '#'.to_string(),                                // #
68        "root" => '#'.to_string(),                                 // #
69
70        // This is the unicode section
71        // Unicode names came from https://www.compart.com/en/unicode
72        // Private Use Area (U+E000-U+F8FF)
73        // Unicode can't be mixed with Ansi or it will break width calculation
74        "nf_branch" => '\u{e0a0}'.to_string(),                     // 
75        "nf_segment" => '\u{e0b0}'.to_string(),                    // 
76        "nf_left_segment" => '\u{e0b0}'.to_string(),               // 
77        "nf_left_segment_thin" => '\u{e0b1}'.to_string(),          // 
78        "nf_right_segment" => '\u{e0b2}'.to_string(),              // 
79        "nf_right_segment_thin" => '\u{e0b3}'.to_string(),         // 
80        "nf_git" => '\u{f1d3}'.to_string(),                        // 
81        "nf_git_branch" => "\u{e709}\u{e0a0}".to_string(),         // 
82        "nf_folder1" => '\u{f07c}'.to_string(),                    // 
83        "nf_folder2" => '\u{f115}'.to_string(),                    // 
84        "nf_house1" => '\u{f015}'.to_string(),                     // 
85        "nf_house2" => '\u{f7db}'.to_string(),                     // 
86
87        "identical_to" => '\u{2261}'.to_string(),                  // ≡
88        "hamburger" => '\u{2261}'.to_string(),                     // ≡
89        "not_identical_to" => '\u{2262}'.to_string(),              // ≢
90        "branch_untracked" => '\u{2262}'.to_string(),              // ≢
91        "strictly_equivalent_to" => '\u{2263}'.to_string(),        // ≣
92        "branch_identical" => '\u{2263}'.to_string(),              // ≣
93
94        "upwards_arrow" => '\u{2191}'.to_string(),                 // ↑
95        "branch_ahead" => '\u{2191}'.to_string(),                  // ↑
96        "downwards_arrow" => '\u{2193}'.to_string(),               // ↓
97        "branch_behind" => '\u{2193}'.to_string(),                 // ↓
98        "up_down_arrow" => '\u{2195}'.to_string(),                 // ↕
99        "branch_ahead_behind" => '\u{2195}'.to_string(),           // ↕
100
101        "black_right_pointing_triangle" => '\u{25b6}'.to_string(), // ▶
102        "prompt" => '\u{25b6}'.to_string(),                        // ▶
103        "vector_or_cross_product" => '\u{2a2f}'.to_string(),       // ⨯
104        "failed" => '\u{2a2f}'.to_string(),                        // ⨯
105        "high_voltage_sign" => '\u{26a1}'.to_string(),             // ⚡
106        "elevated" => '\u{26a1}'.to_string(),                      // ⚡
107
108        // This is the emoji section
109        // Weather symbols
110        // https://www.babelstone.co.uk/Unicode/whatisit.html
111        "sun" => "☀️".to_string(),         //2600 + fe0f
112        "sunny" => "☀️".to_string(),       //2600 + fe0f
113        "sunrise" => "☀️".to_string(),     //2600 + fe0f
114        "moon" => "🌛".to_string(),        //1f31b
115        "cloudy" => "☁️".to_string(),      //2601 + fe0f
116        "cloud" => "☁️".to_string(),       //2601 + fe0f
117        "clouds" => "☁️".to_string(),      //2601 + fe0f
118        "rainy" => "🌦️".to_string(),       //1f326 + fe0f
119        "rain" => "🌦️".to_string(),        //1f326 + fe0f
120        "foggy" => "🌫️".to_string(),       //1f32b + fe0f
121        "fog" => "🌫️".to_string(),         //1f32b + fe0f
122        "mist" => '\u{2591}'.to_string(),  //2591
123        "haze" => '\u{2591}'.to_string(),  //2591
124        "snowy" => "❄️".to_string(),       //2744 + fe0f
125        "snow" => "❄️".to_string(),        //2744 + fe0f
126        "thunderstorm" => "🌩️".to_string(),//1f329 + fe0f
127        "thunder" => "🌩️".to_string(),     //1f329 + fe0f
128
129        // This is the "other" section
130        "bel" => '\x07'.to_string(),       // Terminal Bell
131        "backspace" => '\x08'.to_string(), // Backspace
132
133        // separators
134        "file_separator" => '\x1c'.to_string(),
135        "file_sep"  => '\x1c'.to_string(),
136        "fs" => '\x1c'.to_string(),
137        "group_separator" => '\x1d'.to_string(),
138        "group_sep" => '\x1d'.to_string(),
139        "gs" => '\x1d'.to_string(),
140        "record_separator" => '\x1e'.to_string(),
141        "record_sep" => '\x1e'.to_string(),
142        "rs" => '\x1e'.to_string(),
143        "unit_separator" => '\x1f'.to_string(),
144        "unit_sep" => '\x1f'.to_string(),
145        "us" => '\x1f'.to_string(),
146    }
147});
148
149static CHAR_NAMES: LazyLock<Vec<&'static str>> =
150    LazyLock::new(|| CHAR_MAP.keys().copied().collect());
151
152static NO_OUTPUT_CHARS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
153    [
154        // If the character is in the this set, we don't output it to prevent
155        // the broken of `char --list` command table format and alignment.
156        "nul",
157        "null_byte",
158        "zero_byte",
159        "newline",
160        "enter",
161        "nl",
162        "line_feed",
163        "lf",
164        "cr",
165        "crlf",
166        "bel",
167        "backspace",
168        "lsep",
169        "line_sep",
170        "eol",
171    ]
172    .into_iter()
173    .collect()
174});
175
176impl Command for Char {
177    fn name(&self) -> &str {
178        "char"
179    }
180
181    fn signature(&self) -> Signature {
182        Signature::build("char")
183            .input_output_types(vec![(Type::Nothing, Type::Any)])
184            .param(Parameter::Optional(
185                PositionalArg::new("character", SyntaxShape::Any)
186                    .desc("The name of the character to output.")
187                    .completion(Completion::new_list(&CHAR_NAMES)),
188            ))
189            .rest("rest", SyntaxShape::Any, "Multiple Unicode bytes.")
190            .switch("list", "List all supported character names.", Some('l'))
191            .switch("unicode", "Unicode string i.e. 1f378.", Some('u'))
192            .switch("integer", "Create a codepoint from an integer.", Some('i'))
193            .allow_variants_without_examples(true)
194            .category(Category::Strings)
195    }
196
197    fn is_const(&self) -> bool {
198        true
199    }
200
201    fn description(&self) -> &str {
202        "Output special characters (e.g., 'newline')."
203    }
204
205    fn search_terms(&self) -> Vec<&str> {
206        vec!["line break", "newline", "Unicode"]
207    }
208
209    fn examples(&self) -> Vec<Example<'_>> {
210        vec![
211            Example {
212                description: "Output newline",
213                example: "char newline",
214                result: Some(Value::test_string("\n")),
215            },
216            Example {
217                description: "List available characters",
218                example: "char --list",
219                result: None,
220            },
221            Example {
222                description: "Output prompt character, newline and a hamburger menu character",
223                example: "(char prompt) + (char newline) + (char hamburger)",
224                result: Some(Value::test_string("\u{25b6}\n\u{2261}")),
225            },
226            Example {
227                description: "Output Unicode character",
228                example: "char --unicode 1f378",
229                result: Some(Value::test_string("\u{1f378}")),
230            },
231            Example {
232                description: "Create Unicode from integer codepoint values",
233                example: "char --integer (0x60 + 1) (0x60 + 2)",
234                result: Some(Value::test_string("ab")),
235            },
236            Example {
237                description: "Output multi-byte Unicode character",
238                example: "char --unicode 1F468 200D 1F466 200D 1F466",
239                result: Some(Value::test_string(
240                    "\u{1F468}\u{200D}\u{1F466}\u{200D}\u{1F466}",
241                )),
242            },
243        ]
244    }
245
246    fn run_const(
247        &self,
248        working_set: &StateWorkingSet,
249        call: &Call,
250        _input: PipelineData,
251    ) -> Result<PipelineData, ShellError> {
252        let call_span = call.head;
253        let list = call.has_flag_const(working_set, "list")?;
254        let integer = call.has_flag_const(working_set, "integer")?;
255        let unicode = call.has_flag_const(working_set, "unicode")?;
256
257        // handle -l flag
258        if list {
259            return Ok(generate_character_list(
260                working_set.permanent().signals().clone(),
261                call.head,
262            ));
263        }
264
265        // handle -i flag
266        if integer {
267            let int_args = call.rest_const(working_set, 0)?;
268            handle_integer_flag(int_args, call_span)
269        }
270        // handle -u flag
271        else if unicode {
272            let string_args = call.rest_const(working_set, 0)?;
273            handle_unicode_flag(string_args, call_span)
274        }
275        // handle the rest
276        else {
277            let string_args = call.rest_const(working_set, 0)?;
278            handle_the_rest(string_args, call_span)
279        }
280    }
281
282    fn run(
283        &self,
284        engine_state: &EngineState,
285        stack: &mut Stack,
286        call: &Call,
287        _input: PipelineData,
288    ) -> Result<PipelineData, ShellError> {
289        let call_span = call.head;
290        let list = call.has_flag(engine_state, stack, "list")?;
291        let integer = call.has_flag(engine_state, stack, "integer")?;
292        let unicode = call.has_flag(engine_state, stack, "unicode")?;
293
294        // handle -l flag
295        if list {
296            return Ok(generate_character_list(
297                engine_state.signals().clone(),
298                call_span,
299            ));
300        }
301
302        // handle -i flag
303        if integer {
304            let int_args = call.rest(engine_state, stack, 0)?;
305            handle_integer_flag(int_args, call_span)
306        }
307        // handle -u flag
308        else if unicode {
309            let string_args = call.rest(engine_state, stack, 0)?;
310            handle_unicode_flag(string_args, call_span)
311        }
312        // handle the rest
313        else {
314            let string_args = call.rest(engine_state, stack, 0)?;
315            handle_the_rest(string_args, call_span)
316        }
317    }
318}
319
320fn generate_character_list(signals: Signals, call_span: Span) -> PipelineData {
321    CHAR_MAP
322        .iter()
323        .map(move |(name, s)| {
324            let character = if NO_OUTPUT_CHARS.contains(name) {
325                Value::string("", call_span)
326            } else {
327                Value::string(s, call_span)
328            };
329            let unicode = Value::string(
330                s.chars()
331                    .map(|c| format!("{:x}", c as u32))
332                    .collect::<Vec<String>>()
333                    .join(" "),
334                call_span,
335            );
336            let record = record! {
337                "name" => Value::string(*name, call_span),
338                "character" => character,
339                "unicode" => unicode,
340            };
341
342            Value::record(record, call_span)
343        })
344        .into_pipeline_data(call_span, signals)
345}
346
347fn handle_integer_flag(
348    int_args: Vec<Spanned<i64>>,
349    call_span: Span,
350) -> Result<PipelineData, ShellError> {
351    if int_args.is_empty() {
352        return Err(ShellError::MissingParameter {
353            param_name: "missing at least one unicode character".into(),
354            span: call_span,
355        });
356    }
357
358    let str = int_args
359        .into_iter()
360        .map(integer_to_unicode_char)
361        .collect::<Result<String, _>>()?;
362
363    Ok(Value::string(str, call_span).into_pipeline_data())
364}
365
366fn handle_unicode_flag(
367    string_args: Vec<Spanned<String>>,
368    call_span: Span,
369) -> Result<PipelineData, ShellError> {
370    if string_args.is_empty() {
371        return Err(ShellError::MissingParameter {
372            param_name: "missing at least one unicode character".into(),
373            span: call_span,
374        });
375    }
376
377    let str = string_args
378        .into_iter()
379        .map(string_to_unicode_char)
380        .collect::<Result<String, _>>()?;
381
382    Ok(Value::string(str, call_span).into_pipeline_data())
383}
384
385fn handle_the_rest(
386    string_args: Vec<Spanned<String>>,
387    call_span: Span,
388) -> Result<PipelineData, ShellError> {
389    let Some(s) = string_args.first() else {
390        return Err(ShellError::MissingParameter {
391            param_name: "missing name of the character".into(),
392            span: call_span,
393        });
394    };
395
396    let special_character = str_to_character(&s.item);
397
398    if let Some(output) = special_character {
399        Ok(Value::string(output, call_span).into_pipeline_data())
400    } else {
401        Err(ShellError::TypeMismatch {
402            err_message: "error finding named character".into(),
403            span: s.span,
404        })
405    }
406}
407
408fn integer_to_unicode_char(value: Spanned<i64>) -> Result<char, ShellError> {
409    let decoded_char = value.item.try_into().ok().and_then(std::char::from_u32);
410
411    if let Some(ch) = decoded_char {
412        Ok(ch)
413    } else {
414        Err(ShellError::TypeMismatch {
415            err_message: "not a valid Unicode codepoint".into(),
416            span: value.span,
417        })
418    }
419}
420
421fn string_to_unicode_char(s: Spanned<String>) -> Result<char, ShellError> {
422    let decoded_char = u32::from_str_radix(&s.item, 16)
423        .ok()
424        .and_then(std::char::from_u32);
425
426    if let Some(ch) = decoded_char {
427        Ok(ch)
428    } else {
429        Err(ShellError::TypeMismatch {
430            err_message: "error decoding Unicode character".into(),
431            span: s.span,
432        })
433    }
434}
435
436fn str_to_character(s: &str) -> Option<String> {
437    CHAR_MAP.get(s).map(|s| s.into())
438}
439
440#[cfg(test)]
441mod tests {
442    use super::Char;
443
444    #[test]
445    fn examples_work_as_expected() -> nu_test_support::Result {
446        nu_test_support::test().examples(Char)
447    }
448}