nu_command/strings/
char_.rs

1use indexmap::{IndexMap, indexmap};
2use nu_engine::command_prelude::*;
3
4use nu_protocol::Signals;
5use std::collections::HashSet;
6use std::sync::LazyLock;
7
8// Character used to separate directories in a Path Environment variable on windows is ";"
9#[cfg(target_family = "windows")]
10const ENV_PATH_SEPARATOR_CHAR: char = ';';
11// Character used to separate directories in a Path Environment variable on linux/mac/unix is ":"
12#[cfg(not(target_family = "windows"))]
13const ENV_PATH_SEPARATOR_CHAR: char = ':';
14
15// Character used to separate directories in a Path Environment variable on windows is ";"
16#[cfg(target_family = "windows")]
17const LINE_SEPARATOR_CHAR: &str = "\r\n";
18// Character used to separate directories in a Path Environment variable on linux/mac/unix is ":"
19#[cfg(not(target_family = "windows"))]
20const LINE_SEPARATOR_CHAR: char = '\n';
21
22#[derive(Clone)]
23pub struct Char;
24
25static CHAR_MAP: LazyLock<IndexMap<&'static str, String>> = LazyLock::new(|| {
26    indexmap! {
27        // These are some regular characters that either can't be used or
28        // it's just easier to use them like this.
29
30        "nul" => '\x00'.to_string(),                                // nul character, 0x00
31        "null_byte" => '\x00'.to_string(),                          // nul character, 0x00
32        "zero_byte" => '\x00'.to_string(),                          // nul character, 0x00
33        // This are the "normal" characters section
34        "newline" => '\n'.to_string(),
35        "enter" => '\n'.to_string(),
36        "nl" => '\n'.to_string(),
37        "line_feed" => '\n'.to_string(),
38        "lf" => '\n'.to_string(),
39        "carriage_return" => '\r'.to_string(),
40        "cr" => '\r'.to_string(),
41        "crlf" => "\r\n".to_string(),
42        "tab" => '\t'.to_string(),
43        "sp" => ' '.to_string(),
44        "space" => ' '.to_string(),
45        "pipe" => '|'.to_string(),
46        "left_brace" => '{'.to_string(),
47        "lbrace" => '{'.to_string(),
48        "right_brace" => '}'.to_string(),
49        "rbrace" => '}'.to_string(),
50        "left_paren" => '('.to_string(),
51        "lp" => '('.to_string(),
52        "lparen" => '('.to_string(),
53        "right_paren" => ')'.to_string(),
54        "rparen" => ')'.to_string(),
55        "rp" => ')'.to_string(),
56        "left_bracket" => '['.to_string(),
57        "lbracket" => '['.to_string(),
58        "right_bracket" => ']'.to_string(),
59        "rbracket" => ']'.to_string(),
60        "single_quote" => '\''.to_string(),
61        "squote" => '\''.to_string(),
62        "sq" => '\''.to_string(),
63        "double_quote" => '\"'.to_string(),
64        "dquote" => '\"'.to_string(),
65        "dq" => '\"'.to_string(),
66        "path_sep" => std::path::MAIN_SEPARATOR.to_string(),
67        "psep" => std::path::MAIN_SEPARATOR.to_string(),
68        "separator" => std::path::MAIN_SEPARATOR.to_string(),
69        "eol" => LINE_SEPARATOR_CHAR.to_string(),
70        "lsep" => LINE_SEPARATOR_CHAR.to_string(),
71        "line_sep" => LINE_SEPARATOR_CHAR.to_string(),
72        "esep" => ENV_PATH_SEPARATOR_CHAR.to_string(),
73        "env_sep" => ENV_PATH_SEPARATOR_CHAR.to_string(),
74        "tilde" => '~'.to_string(),                                // ~
75        "twiddle" => '~'.to_string(),                              // ~
76        "squiggly" => '~'.to_string(),                             // ~
77        "home" => '~'.to_string(),                                 // ~
78        "hash" => '#'.to_string(),                                 // #
79        "hashtag" => '#'.to_string(),                              // #
80        "pound_sign" => '#'.to_string(),                           // #
81        "sharp" => '#'.to_string(),                                // #
82        "root" => '#'.to_string(),                                 // #
83
84        // This is the unicode section
85        // Unicode names came from https://www.compart.com/en/unicode
86        // Private Use Area (U+E000-U+F8FF)
87        // Unicode can't be mixed with Ansi or it will break width calculation
88        "nf_branch" => '\u{e0a0}'.to_string(),                     // 
89        "nf_segment" => '\u{e0b0}'.to_string(),                    // 
90        "nf_left_segment" => '\u{e0b0}'.to_string(),               // 
91        "nf_left_segment_thin" => '\u{e0b1}'.to_string(),          // 
92        "nf_right_segment" => '\u{e0b2}'.to_string(),              // 
93        "nf_right_segment_thin" => '\u{e0b3}'.to_string(),         // 
94        "nf_git" => '\u{f1d3}'.to_string(),                        // 
95        "nf_git_branch" => "\u{e709}\u{e0a0}".to_string(),         // 
96        "nf_folder1" => '\u{f07c}'.to_string(),                    // 
97        "nf_folder2" => '\u{f115}'.to_string(),                    // 
98        "nf_house1" => '\u{f015}'.to_string(),                     // 
99        "nf_house2" => '\u{f7db}'.to_string(),                     // 
100
101        "identical_to" => '\u{2261}'.to_string(),                  // ≡
102        "hamburger" => '\u{2261}'.to_string(),                     // ≡
103        "not_identical_to" => '\u{2262}'.to_string(),              // ≢
104        "branch_untracked" => '\u{2262}'.to_string(),              // ≢
105        "strictly_equivalent_to" => '\u{2263}'.to_string(),        // ≣
106        "branch_identical" => '\u{2263}'.to_string(),              // ≣
107
108        "upwards_arrow" => '\u{2191}'.to_string(),                 // ↑
109        "branch_ahead" => '\u{2191}'.to_string(),                  // ↑
110        "downwards_arrow" => '\u{2193}'.to_string(),               // ↓
111        "branch_behind" => '\u{2193}'.to_string(),                 // ↓
112        "up_down_arrow" => '\u{2195}'.to_string(),                 // ↕
113        "branch_ahead_behind" => '\u{2195}'.to_string(),           // ↕
114
115        "black_right_pointing_triangle" => '\u{25b6}'.to_string(), // ▶
116        "prompt" => '\u{25b6}'.to_string(),                        // ▶
117        "vector_or_cross_product" => '\u{2a2f}'.to_string(),       // ⨯
118        "failed" => '\u{2a2f}'.to_string(),                        // ⨯
119        "high_voltage_sign" => '\u{26a1}'.to_string(),             // ⚡
120        "elevated" => '\u{26a1}'.to_string(),                      // ⚡
121
122        // This is the emoji section
123        // Weather symbols
124        // https://www.babelstone.co.uk/Unicode/whatisit.html
125        "sun" => "☀️".to_string(),         //2600 + fe0f
126        "sunny" => "☀️".to_string(),       //2600 + fe0f
127        "sunrise" => "☀️".to_string(),     //2600 + fe0f
128        "moon" => "🌛".to_string(),        //1f31b
129        "cloudy" => "☁️".to_string(),      //2601 + fe0f
130        "cloud" => "☁️".to_string(),       //2601 + fe0f
131        "clouds" => "☁️".to_string(),      //2601 + fe0f
132        "rainy" => "🌦️".to_string(),       //1f326 + fe0f
133        "rain" => "🌦️".to_string(),        //1f326 + fe0f
134        "foggy" => "🌫️".to_string(),       //1f32b + fe0f
135        "fog" => "🌫️".to_string(),         //1f32b + fe0f
136        "mist" => '\u{2591}'.to_string(),  //2591
137        "haze" => '\u{2591}'.to_string(),  //2591
138        "snowy" => "❄️".to_string(),       //2744 + fe0f
139        "snow" => "❄️".to_string(),        //2744 + fe0f
140        "thunderstorm" => "🌩️".to_string(),//1f329 + fe0f
141        "thunder" => "🌩️".to_string(),     //1f329 + fe0f
142
143        // This is the "other" section
144        "bel" => '\x07'.to_string(),       // Terminal Bell
145        "backspace" => '\x08'.to_string(), // Backspace
146
147        // separators
148        "file_separator" => '\x1c'.to_string(),
149        "file_sep"  => '\x1c'.to_string(),
150        "fs" => '\x1c'.to_string(),
151        "group_separator" => '\x1d'.to_string(),
152        "group_sep" => '\x1d'.to_string(),
153        "gs" => '\x1d'.to_string(),
154        "record_separator" => '\x1e'.to_string(),
155        "record_sep" => '\x1e'.to_string(),
156        "rs" => '\x1e'.to_string(),
157        "unit_separator" => '\x1f'.to_string(),
158        "unit_sep" => '\x1f'.to_string(),
159        "us" => '\x1f'.to_string(),
160    }
161});
162
163static NO_OUTPUT_CHARS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
164    [
165        // If the character is in the this set, we don't output it to prevent
166        // the broken of `char --list` command table format and alignment.
167        "nul",
168        "null_byte",
169        "zero_byte",
170        "newline",
171        "enter",
172        "nl",
173        "line_feed",
174        "lf",
175        "cr",
176        "crlf",
177        "bel",
178        "backspace",
179        "lsep",
180        "line_sep",
181        "eol",
182    ]
183    .into_iter()
184    .collect()
185});
186
187impl Command for Char {
188    fn name(&self) -> &str {
189        "char"
190    }
191
192    fn signature(&self) -> Signature {
193        Signature::build("char")
194            .input_output_types(vec![(Type::Nothing, Type::Any)])
195            .optional(
196                "character",
197                SyntaxShape::Any,
198                "The name of the character to output.",
199            )
200            .rest("rest", SyntaxShape::Any, "Multiple Unicode bytes.")
201            .switch("list", "List all supported character names", Some('l'))
202            .switch("unicode", "Unicode string i.e. 1f378", Some('u'))
203            .switch("integer", "Create a codepoint from an integer", Some('i'))
204            .allow_variants_without_examples(true)
205            .category(Category::Strings)
206    }
207
208    fn is_const(&self) -> bool {
209        true
210    }
211
212    fn description(&self) -> &str {
213        "Output special characters (e.g., 'newline')."
214    }
215
216    fn search_terms(&self) -> Vec<&str> {
217        vec!["line break", "newline", "Unicode"]
218    }
219
220    fn examples(&self) -> Vec<Example<'_>> {
221        vec![
222            Example {
223                description: "Output newline",
224                example: r#"char newline"#,
225                result: Some(Value::test_string("\n")),
226            },
227            Example {
228                description: "List available characters",
229                example: r#"char --list"#,
230                result: None,
231            },
232            Example {
233                description: "Output prompt character, newline and a hamburger menu character",
234                example: r#"(char prompt) + (char newline) + (char hamburger)"#,
235                result: Some(Value::test_string("\u{25b6}\n\u{2261}")),
236            },
237            Example {
238                description: "Output Unicode character",
239                example: r#"char --unicode 1f378"#,
240                result: Some(Value::test_string("\u{1f378}")),
241            },
242            Example {
243                description: "Create Unicode from integer codepoint values",
244                example: r#"char --integer (0x60 + 1) (0x60 + 2)"#,
245                result: Some(Value::test_string("ab")),
246            },
247            Example {
248                description: "Output multi-byte Unicode character",
249                example: r#"char --unicode 1F468 200D 1F466 200D 1F466"#,
250                result: Some(Value::test_string(
251                    "\u{1F468}\u{200D}\u{1F466}\u{200D}\u{1F466}",
252                )),
253            },
254        ]
255    }
256
257    fn run_const(
258        &self,
259        working_set: &StateWorkingSet,
260        call: &Call,
261        _input: PipelineData,
262    ) -> Result<PipelineData, ShellError> {
263        let call_span = call.head;
264        let list = call.has_flag_const(working_set, "list")?;
265        let integer = call.has_flag_const(working_set, "integer")?;
266        let unicode = call.has_flag_const(working_set, "unicode")?;
267
268        // handle -l flag
269        if list {
270            return Ok(generate_character_list(
271                working_set.permanent().signals().clone(),
272                call.head,
273            ));
274        }
275
276        // handle -i flag
277        if integer {
278            let int_args = call.rest_const(working_set, 0)?;
279            handle_integer_flag(int_args, call_span)
280        }
281        // handle -u flag
282        else if unicode {
283            let string_args = call.rest_const(working_set, 0)?;
284            handle_unicode_flag(string_args, call_span)
285        }
286        // handle the rest
287        else {
288            let string_args = call.rest_const(working_set, 0)?;
289            handle_the_rest(string_args, call_span)
290        }
291    }
292
293    fn run(
294        &self,
295        engine_state: &EngineState,
296        stack: &mut Stack,
297        call: &Call,
298        _input: PipelineData,
299    ) -> Result<PipelineData, ShellError> {
300        let call_span = call.head;
301        let list = call.has_flag(engine_state, stack, "list")?;
302        let integer = call.has_flag(engine_state, stack, "integer")?;
303        let unicode = call.has_flag(engine_state, stack, "unicode")?;
304
305        // handle -l flag
306        if list {
307            return Ok(generate_character_list(
308                engine_state.signals().clone(),
309                call_span,
310            ));
311        }
312
313        // handle -i flag
314        if integer {
315            let int_args = call.rest(engine_state, stack, 0)?;
316            handle_integer_flag(int_args, call_span)
317        }
318        // handle -u flag
319        else if unicode {
320            let string_args = call.rest(engine_state, stack, 0)?;
321            handle_unicode_flag(string_args, call_span)
322        }
323        // handle the rest
324        else {
325            let string_args = call.rest(engine_state, stack, 0)?;
326            handle_the_rest(string_args, call_span)
327        }
328    }
329}
330
331fn generate_character_list(signals: Signals, call_span: Span) -> PipelineData {
332    CHAR_MAP
333        .iter()
334        .map(move |(name, s)| {
335            let character = if NO_OUTPUT_CHARS.contains(name) {
336                Value::string("", call_span)
337            } else {
338                Value::string(s, call_span)
339            };
340            let unicode = Value::string(
341                s.chars()
342                    .map(|c| format!("{:x}", c as u32))
343                    .collect::<Vec<String>>()
344                    .join(" "),
345                call_span,
346            );
347            let record = record! {
348                "name" => Value::string(*name, call_span),
349                "character" => character,
350                "unicode" => unicode,
351            };
352
353            Value::record(record, call_span)
354        })
355        .into_pipeline_data(call_span, signals)
356}
357
358fn handle_integer_flag(
359    int_args: Vec<Spanned<i64>>,
360    call_span: Span,
361) -> Result<PipelineData, ShellError> {
362    if int_args.is_empty() {
363        return Err(ShellError::MissingParameter {
364            param_name: "missing at least one unicode character".into(),
365            span: call_span,
366        });
367    }
368
369    let str = int_args
370        .into_iter()
371        .map(integer_to_unicode_char)
372        .collect::<Result<String, _>>()?;
373
374    Ok(Value::string(str, call_span).into_pipeline_data())
375}
376
377fn handle_unicode_flag(
378    string_args: Vec<Spanned<String>>,
379    call_span: Span,
380) -> Result<PipelineData, ShellError> {
381    if string_args.is_empty() {
382        return Err(ShellError::MissingParameter {
383            param_name: "missing at least one unicode character".into(),
384            span: call_span,
385        });
386    }
387
388    let str = string_args
389        .into_iter()
390        .map(string_to_unicode_char)
391        .collect::<Result<String, _>>()?;
392
393    Ok(Value::string(str, call_span).into_pipeline_data())
394}
395
396fn handle_the_rest(
397    string_args: Vec<Spanned<String>>,
398    call_span: Span,
399) -> Result<PipelineData, ShellError> {
400    let Some(s) = string_args.first() else {
401        return Err(ShellError::MissingParameter {
402            param_name: "missing name of the character".into(),
403            span: call_span,
404        });
405    };
406
407    let special_character = str_to_character(&s.item);
408
409    if let Some(output) = special_character {
410        Ok(Value::string(output, call_span).into_pipeline_data())
411    } else {
412        Err(ShellError::TypeMismatch {
413            err_message: "error finding named character".into(),
414            span: s.span,
415        })
416    }
417}
418
419fn integer_to_unicode_char(value: Spanned<i64>) -> Result<char, ShellError> {
420    let decoded_char = value.item.try_into().ok().and_then(std::char::from_u32);
421
422    if let Some(ch) = decoded_char {
423        Ok(ch)
424    } else {
425        Err(ShellError::TypeMismatch {
426            err_message: "not a valid Unicode codepoint".into(),
427            span: value.span,
428        })
429    }
430}
431
432fn string_to_unicode_char(s: Spanned<String>) -> Result<char, ShellError> {
433    let decoded_char = u32::from_str_radix(&s.item, 16)
434        .ok()
435        .and_then(std::char::from_u32);
436
437    if let Some(ch) = decoded_char {
438        Ok(ch)
439    } else {
440        Err(ShellError::TypeMismatch {
441            err_message: "error decoding Unicode character".into(),
442            span: s.span,
443        })
444    }
445}
446
447fn str_to_character(s: &str) -> Option<String> {
448    CHAR_MAP.get(s).map(|s| s.into())
449}
450
451#[cfg(test)]
452mod tests {
453    use super::Char;
454
455    #[test]
456    fn examples_work_as_expected() {
457        use crate::test_examples;
458
459        test_examples(Char {})
460    }
461}