yozuk_helper_encoding/
lib.rs

1#![forbid(unsafe_code)]
2#![deny(clippy::all)]
3
4use yozuk_sdk::encoding::*;
5use yozuk_sdk::prelude::*;
6
7#[derive(Debug)]
8pub struct EncodingPreprocessor {
9    encodings: Vec<RawEncoding>,
10}
11
12impl EncodingPreprocessor {
13    pub fn new<I>(encodings: I) -> Self
14    where
15        I: IntoIterator<Item = RawEncoding>,
16    {
17        Self {
18            encodings: encodings.into_iter().collect(),
19        }
20    }
21}
22
23impl Preprocessor for EncodingPreprocessor {
24    fn preprocess(&self, input: Vec<Token>) -> Vec<Token> {
25        input
26            .into_iter()
27            .map(|t| {
28                let bytes = t.as_str().as_bytes();
29                let decoded = self.encodings.iter().find_map(|enc| match enc {
30                    RawEncoding::Base64 if is_like_base64(bytes) => {
31                        base64::decode_config(t.as_str(), base64::STANDARD)
32                            .ok()
33                            .map(|data| (enc, data))
34                    }
35                    RawEncoding::Base64Url if is_like_base64(bytes) => {
36                        base64::decode_config(t.as_str(), base64::URL_SAFE)
37                            .ok()
38                            .map(|data| (enc, data))
39                    }
40                    RawEncoding::Hex if is_like_hex(bytes) => {
41                        hex::decode(t.as_str()).ok().map(|data| (enc, data))
42                    }
43                    _ => None,
44                });
45                if let Some((enc, data)) = decoded {
46                    Token {
47                        data: data.into(),
48                        raw_encoding: Some(*enc),
49                        ..t
50                    }
51                } else {
52                    t
53                }
54            })
55            .collect()
56    }
57}
58
59pub fn is_like_base64(data: &[u8]) -> bool {
60    let invalid = data.iter().any(|&c| {
61        !(b'a'..=b'z').contains(&c)
62            && !(b'A'..=b'Z').contains(&c)
63            && !(b'0'..=b'9').contains(&c)
64            && c != b'+'
65            && c != b'/'
66            && c != b'-'
67            && c != b'_'
68            && c != b'='
69    });
70    if invalid {
71        return false;
72    }
73    let mut score = 0;
74    score += data.iter().any(|c| (b'a'..=b'f').contains(c)) as u8;
75    score += data.iter().any(|c| (b'A'..=b'F').contains(c)) as u8;
76    score += data.iter().any(|c| (b'g'..=b'z').contains(c)) as u8;
77    score += data.iter().any(|c| (b'G'..=b'Z').contains(c)) as u8;
78    score += data.iter().any(|c| (b'0'..=b'9').contains(c)) as u8;
79    score += data
80        .iter()
81        .any(|&c| c == b'+' || c == b'/' || c == b'-' || c == b'_' || c == b'=') as u8;
82    score >= 4
83}
84
85pub fn is_like_hex(data: &[u8]) -> bool {
86    if data.len() < 16 {
87        return false;
88    }
89    let mut upper = 0;
90    let mut lower = 0;
91    let mut number = false;
92    for c in data {
93        match c {
94            b'a'..=b'f' => lower = 1,
95            b'A'..=b'F' => upper = 1,
96            b'0'..=b'9' => number = true,
97            _ => return false,
98        }
99    }
100    (lower + upper) == 1 && number
101}