sequoia_openpgp/armor/base64_utils.rs
1use std::{
2 borrow::Cow,
3};
4
5use base64::Engine;
6use base64::engine::general_purpose::STANDARD as base64std;
7
8use crate::{
9 packet::Header,
10};
11
12/// Remove whitespace, etc. from the base64 data.
13///
14/// This function returns the filtered base64 data (i.e., stripped of
15/// all skippable data like whitespace), and the amount of unfiltered
16/// data that corresponds to. Thus, if we have the following 7 bytes:
17///
18/// ```text
19/// ab cde
20/// 0123456
21/// ```
22///
23/// This function returns ("abcd", 6), because the 'd' is the last
24/// character in the last complete base64 chunk, and it is at offset 5.
25///
26/// If 'd' is followed by whitespace, it is undefined whether that
27/// whitespace is included in the count.
28///
29/// This function only returns full chunks of base64 data. As a
30/// consequence, if base64_data_max is less than 4, then this will not
31/// return any data.
32///
33/// This function will stop after it sees base64 padding, and if it
34/// sees invalid base64 data.
35pub fn base64_filter(mut bytes: Cow<[u8]>, base64_data_max: usize,
36 mut prefix_remaining: usize, prefix_len: usize)
37 -> (Cow<[u8]>, usize, usize)
38{
39 let mut leading_whitespace = 0;
40
41 // Round down to the nearest chunk size.
42 let base64_data_max = base64_data_max / 4 * 4;
43
44 // Number of bytes of base64 data. Since we update `bytes` in
45 // place, the base64 data is `&bytes[..base64_len]`.
46 let mut base64_len = 0;
47
48 // Offset of the next byte of unfiltered data to process.
49 let mut unfiltered_offset = 0;
50
51 // Offset of the last byte of the last ***complete*** base64 chunk
52 // in the unfiltered data.
53 let mut unfiltered_complete_len = 0;
54
55 // Number of bytes of padding that we've seen so far.
56 let mut padding = 0;
57
58 while unfiltered_offset < bytes.len()
59 && base64_len < base64_data_max
60 // A valid base64 chunk never starts with padding.
61 && ! (padding > 0 && base64_len % 4 == 0)
62 {
63 // If we have some prefix to skip, skip it.
64 if prefix_remaining > 0 {
65 prefix_remaining -= 1;
66 if unfiltered_offset == 0 {
67 match bytes {
68 Cow::Borrowed(s) => {
69 // We're at the beginning. Avoid moving
70 // data by cutting off the start of the
71 // slice.
72 bytes = Cow::Borrowed(&s[1..]);
73 leading_whitespace += 1;
74 continue;
75 }
76 Cow::Owned(_) => (),
77 }
78 }
79 unfiltered_offset += 1;
80 continue;
81 }
82 match bytes[unfiltered_offset] {
83 // White space.
84 c if c.is_ascii_whitespace() => {
85 if c == b'\n' {
86 prefix_remaining = prefix_len;
87 }
88 if unfiltered_offset == 0 {
89 match bytes {
90 Cow::Borrowed(s) => {
91 // We're at the beginning. Avoid moving
92 // data by cutting off the start of the
93 // slice.
94 bytes = Cow::Borrowed(&s[1..]);
95 leading_whitespace += 1;
96 continue;
97 }
98 Cow::Owned(_) => (),
99 }
100 }
101 }
102
103 // Padding.
104 b'=' => {
105 if padding == 2 {
106 // There can never be more than two bytes of
107 // padding.
108 break;
109 }
110 if base64_len % 4 == 0 {
111 // Padding can never occur at the start of a
112 // base64 chunk.
113 break;
114 }
115
116 if unfiltered_offset != base64_len {
117 bytes.to_mut()[base64_len] = b'=';
118 }
119 base64_len += 1;
120 if base64_len % 4 == 0 {
121 unfiltered_complete_len = unfiltered_offset + 1;
122 }
123 padding += 1;
124 }
125
126 // The only thing that can occur after padding is
127 // whitespace or padding. Those cases were covered above.
128 _ if padding > 0 => break,
129
130 // Base64 data!
131 b if is_base64_char(&b) => {
132 if unfiltered_offset != base64_len {
133 bytes.to_mut()[base64_len] = b;
134 }
135 base64_len += 1;
136 if base64_len % 4 == 0 {
137 unfiltered_complete_len = unfiltered_offset + 1;
138 }
139 }
140
141 // Not base64 data.
142 _ => break,
143 }
144
145 unfiltered_offset += 1;
146 }
147
148 let base64_len = base64_len - (base64_len % 4);
149 unfiltered_complete_len += leading_whitespace;
150 match bytes {
151 Cow::Borrowed(s) =>
152 (Cow::Borrowed(&s[..base64_len]), unfiltered_complete_len,
153 prefix_remaining),
154 Cow::Owned(mut v) => {
155 crate::vec_truncate(&mut v, base64_len);
156 (Cow::Owned(v), unfiltered_complete_len, prefix_remaining)
157 }
158 }
159}
160
161/// Checks whether the given bytes contain armored OpenPGP data.
162pub fn is_armored_pgp_blob(bytes: &[u8]) -> bool {
163 // Get up to 32 bytes of base64 data. That's 24 bytes of data
164 // (ignoring padding), which is more than enough to get the first
165 // packet's header.
166 let (bytes, _, _) = base64_filter(Cow::Borrowed(bytes), 32, 0, 0);
167
168 match base64std.decode(bytes) {
169 Ok(d) => {
170 // Don't consider an empty message to be valid.
171 if d.is_empty() {
172 false
173 } else {
174 let mut br = buffered_reader::Memory::new(&d);
175 if let Ok(header) = Header::parse(&mut br) {
176 header.ctb().tag().valid_start_of_message()
177 && header.valid(false).is_ok()
178 } else {
179 false
180 }
181 }
182 },
183 Err(_err) => false,
184 }
185}
186
187/// Checks whether the given byte is in the base64 character set.
188pub fn is_base64_char(b: &u8) -> bool {
189 b.is_ascii_alphanumeric() || *b == b'+' || *b == b'/'
190}
191
192/// Returns the number of bytes of base64 data are needed to encode
193/// `s` bytes of raw data.
194pub fn base64_size(s: usize) -> usize {
195 (s + 3 - 1) / 3 * 4
196}
197
198#[test]
199fn base64_size_test() {
200 assert_eq!(base64_size(0), 0);
201 assert_eq!(base64_size(1), 4);
202 assert_eq!(base64_size(2), 4);
203 assert_eq!(base64_size(3), 4);
204 assert_eq!(base64_size(4), 8);
205 assert_eq!(base64_size(5), 8);
206 assert_eq!(base64_size(6), 8);
207 assert_eq!(base64_size(7), 12);
208}