encoding_mel/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![doc = include_str!("../README.md")]
3
4use melodium_core::*;
5use melodium_macro::{check, mel_package, mel_treatment};
6
7/// Decodes stream of bytes into string.
8///
9/// The incoming stream of bytes is decoded using the specified encoding.
10/// If some characters cannot be decoded for some reason (i.e. invalid according to encoding),
11/// it is replaced by the `U+FFFD REPLACEMENT CHARACTER` (�).
12///
13/// The supported encodings and possible values for `encoding` are defined in
14/// the [Encoding Standard](https://encoding.spec.whatwg.org/#names-and-labels).
15/// If `encoding` is not recognized, UTF-8 is assumed instead.
16#[mel_treatment(
17    default encoding "utf-8"
18    input data Stream<byte>
19    output text Stream<string>
20)]
21pub async fn decode(encoding: string) {
22    let encoding =
23        encoding_rs::Encoding::for_label(encoding.as_bytes()).unwrap_or(encoding_rs::UTF_8);
24    let mut decoder = encoding.new_decoder();
25
26    let mut finished = false;
27    while !finished {
28        let bytes;
29        if let Ok(data) = data
30            .recv_many()
31            .await
32            .map(|values| TryInto::<Vec<u8>>::try_into(values).unwrap())
33        {
34            bytes = data;
35        } else {
36            bytes = vec![];
37            finished = true;
38        }
39
40        let mut result = String::with_capacity(bytes.len() * 2);
41
42        let _ = decoder.decode_to_string(&bytes, &mut result, finished);
43
44        result.shrink_to_fit();
45
46        check!(text.send_one(result.into()).await);
47    }
48}
49
50/// Encodes streamed text with specified encoding.
51///
52/// The incoming string is encoded and outputted into raw bytes. If some characters cannot
53/// be encoded into the specified `encoding`, the behavior is set by `replace`:
54/// - `false`: the character is dropped;
55/// - `true`: the character is replaced with coded XML character (such as `&#65533;`).
56///
57/// The supported encodings and possible values for `encoding` are defined in
58/// the [Encoding Standard](https://encoding.spec.whatwg.org/#names-and-labels).
59/// If `encoding` is not recognized, UTF-8 is assumed instead.
60#[mel_treatment(
61    default encoding "utf-8"
62    default replace false
63    input text Stream<string>
64    output data Stream<byte>
65)]
66pub async fn encode(encoding: string, replace: bool) {
67    let encoding =
68        encoding_rs::Encoding::for_label(encoding.as_bytes()).unwrap_or(encoding_rs::UTF_8);
69    let mut encoder = encoding.new_encoder();
70
71    'main: while let Ok(text) = text
72        .recv_many()
73        .await
74        .map(|values| TryInto::<Vec<string>>::try_into(values).unwrap())
75    {
76        for text in text {
77            let expected_size = if replace {
78                7 * encoder
79                    .max_buffer_length_from_utf8_if_no_unmappables(text.len())
80                    .unwrap_or(2_usize.pow(20))
81            } else {
82                encoder
83                    .max_buffer_length_from_utf8_without_replacement(text.len())
84                    .unwrap_or(2_usize.pow(20))
85            };
86
87            let mut result = Vec::new();
88            result.reserve(expected_size);
89
90            if replace {
91                let _ = encoder.encode_from_utf8_to_vec(&text, &mut result, false);
92            } else {
93                let _ =
94                    encoder.encode_from_utf8_to_vec_without_replacement(&text, &mut result, false);
95            }
96
97            result.shrink_to_fit();
98            check!('main, data.send_many(TransmissionValue::Byte(result.into())).await);
99        }
100    }
101
102    let expected_size = if replace {
103        7 * encoder
104            .max_buffer_length_from_utf8_if_no_unmappables(0)
105            .unwrap_or(2_usize.pow(6))
106    } else {
107        encoder
108            .max_buffer_length_from_utf8_without_replacement(0)
109            .unwrap_or(2_usize.pow(6))
110    };
111
112    let mut result = Vec::new();
113    result.reserve(expected_size);
114
115    if replace {
116        let _ = encoder.encode_from_utf8_to_vec(&String::new(), &mut result, false);
117    } else {
118        let _ =
119            encoder.encode_from_utf8_to_vec_without_replacement(&String::new(), &mut result, false);
120    }
121
122    result.shrink_to_fit();
123    let _ = data.send_many(TransmissionValue::Byte(result.into())).await;
124}
125
126mel_package!();