nu_command/strings/encode_decode/
decode.rs

1use nu_engine::command_prelude::*;
2use oem_cp::decode_string_complete_table;
3use std::collections::HashMap;
4use std::sync::LazyLock;
5
6// create a lazycell of all the code_table "Complete" code pages
7// the commented out code pages are "Incomplete", which means they
8// are stored as Option<char> and not &[char; 128]
9static OEM_DECODE: LazyLock<HashMap<usize, &[char; 128]>> = LazyLock::new(|| {
10    let mut m = HashMap::new();
11    m.insert(437, &oem_cp::code_table::DECODING_TABLE_CP437);
12    // m.insert(720, &oem_cp::code_table::DECODING_TABLE_CP720);
13    m.insert(737, &oem_cp::code_table::DECODING_TABLE_CP737);
14    m.insert(775, &oem_cp::code_table::DECODING_TABLE_CP775);
15
16    m.insert(850, &oem_cp::code_table::DECODING_TABLE_CP850);
17    m.insert(852, &oem_cp::code_table::DECODING_TABLE_CP852);
18    m.insert(855, &oem_cp::code_table::DECODING_TABLE_CP855);
19    // m.insert(857, &oem_cp::code_table::DECODING_TABLE_CP857);
20    m.insert(858, &oem_cp::code_table::DECODING_TABLE_CP858);
21    m.insert(860, &oem_cp::code_table::DECODING_TABLE_CP860);
22    m.insert(861, &oem_cp::code_table::DECODING_TABLE_CP861);
23    m.insert(862, &oem_cp::code_table::DECODING_TABLE_CP862);
24    m.insert(863, &oem_cp::code_table::DECODING_TABLE_CP863);
25    // m.insert(864, &oem_cp::code_table::DECODING_TABLE_CP864);
26    m.insert(865, &oem_cp::code_table::DECODING_TABLE_CP865);
27    m.insert(866, &oem_cp::code_table::DECODING_TABLE_CP866);
28    // m.insert(869, &oem_cp::code_table::DECODING_TABLE_CP869);
29    // m.insert(874, &oem_cp::code_table::DECODING_TABLE_CP874);
30
31    m
32});
33
34#[derive(Clone)]
35pub struct Decode;
36
37impl Command for Decode {
38    fn name(&self) -> &str {
39        "decode"
40    }
41
42    fn description(&self) -> &str {
43        "Decode bytes into a string."
44    }
45
46    fn search_terms(&self) -> Vec<&str> {
47        vec!["text", "encoding", "decoding"]
48    }
49
50    fn signature(&self) -> nu_protocol::Signature {
51        Signature::build("decode")
52            .input_output_types(vec![(Type::Binary, Type::String)])
53            .optional("encoding", SyntaxShape::String, "The text encoding to use.")
54            .category(Category::Strings)
55    }
56
57    fn extra_description(&self) -> &str {
58        r#"Multiple encodings are supported; here are a few:
59big5, euc-jp, euc-kr, gbk, iso-8859-1, utf-16, cp1252, latin5
60
61For a more complete list of encodings please refer to the encoding_rs
62documentation link at https://docs.rs/encoding_rs/latest/encoding_rs/#statics"#
63    }
64
65    fn examples(&self) -> Vec<Example> {
66        vec![
67            Example {
68                description: "Decode the output of an external command",
69                example: "^cat myfile.q | decode utf-8",
70                result: None,
71            },
72            Example {
73                description: "Decode an UTF-16 string into nushell UTF-8 string",
74                example: r#"0x[00 53 00 6F 00 6D 00 65 00 20 00 44 00 61 00 74 00 61] | decode utf-16be"#,
75                result: Some(Value::string("Some Data".to_owned(), Span::test_data())),
76            },
77        ]
78    }
79
80    fn is_const(&self) -> bool {
81        true
82    }
83
84    fn run(
85        &self,
86        engine_state: &EngineState,
87        stack: &mut Stack,
88        call: &Call,
89        input: PipelineData,
90    ) -> Result<PipelineData, ShellError> {
91        let encoding: Option<Spanned<String>> = call.opt(engine_state, stack, 0)?;
92        run(call, input, encoding)
93    }
94
95    fn run_const(
96        &self,
97        working_set: &StateWorkingSet,
98        call: &Call,
99        input: PipelineData,
100    ) -> Result<PipelineData, ShellError> {
101        let encoding: Option<Spanned<String>> = call.opt_const(working_set, 0)?;
102        run(call, input, encoding)
103    }
104}
105
106fn run(
107    call: &Call,
108    input: PipelineData,
109    encoding: Option<Spanned<String>>,
110) -> Result<PipelineData, ShellError> {
111    let head = call.head;
112
113    match input {
114        PipelineData::ByteStream(stream, ..) => {
115            let span = stream.span();
116            let bytes = stream.into_bytes()?;
117            match encoding {
118                Some(encoding_name) => detect_and_decode(encoding_name, head, bytes),
119                None => super::encoding::detect_encoding_name(head, span, &bytes)
120                    .map(|encoding| encoding.decode(&bytes).0.into_owned())
121                    .map(|s| Value::string(s, head)),
122            }
123            .map(|val| val.into_pipeline_data())
124        }
125        PipelineData::Value(v, ..) => {
126            let input_span = v.span();
127            match v {
128                Value::Binary { val: bytes, .. } => match encoding {
129                    Some(encoding_name) => detect_and_decode(encoding_name, head, bytes),
130                    None => super::encoding::detect_encoding_name(head, input_span, &bytes)
131                        .map(|encoding| encoding.decode(&bytes).0.into_owned())
132                        .map(|s| Value::string(s, head)),
133                }
134                .map(|val| val.into_pipeline_data()),
135                Value::Error { error, .. } => Err(*error),
136                _ => Err(ShellError::OnlySupportsThisInputType {
137                    exp_input_type: "binary".into(),
138                    wrong_type: v.get_type().to_string(),
139                    dst_span: head,
140                    src_span: v.span(),
141                }),
142            }
143        }
144        // This should be more precise, but due to difficulties in getting spans
145        // from PipelineData::ListData, this is as it is.
146        _ => Err(ShellError::UnsupportedInput {
147            msg: "non-binary input".into(),
148            input: "value originates from here".into(),
149            msg_span: head,
150            input_span: input.span().unwrap_or(head),
151        }),
152    }
153}
154
155// Since we have two different decoding mechanisms, we allow oem_cp to be
156// specified by only a number like `open file | decode 850`. If this decode
157// parameter parses as a usize then we assume it was intentional and use oem_cp
158// crate. Otherwise, if it doesn't parse as a usize, we assume it was a string
159// and use the encoding_rs crate to try and decode it.
160fn detect_and_decode(
161    encoding_name: Spanned<String>,
162    head: Span,
163    bytes: Vec<u8>,
164) -> Result<Value, ShellError> {
165    let dec_table_id = encoding_name.item.parse::<usize>().unwrap_or(0usize);
166    if dec_table_id == 0 {
167        super::encoding::decode(head, encoding_name, &bytes)
168    } else {
169        Ok(Value::string(
170            decode_string_complete_table(&bytes, OEM_DECODE[&dec_table_id]),
171            head,
172        ))
173    }
174}
175
176#[cfg(test)]
177mod test {
178    use super::*;
179
180    #[test]
181    fn test_examples() {
182        crate::test_examples(Decode)
183    }
184}