age_core/format.rs
1//! Core types and encoding operations used by the age file format.
2
3use base64::{prelude::BASE64_STANDARD_NO_PAD, Engine};
4use rand::{
5 distributions::{Distribution, Uniform},
6 thread_rng, RngCore,
7};
8use secrecy::{ExposeSecret, ExposeSecretMut, SecretBox};
9
10/// The prefix identifying an age stanza.
11const STANZA_TAG: &str = "-> ";
12
13/// The length of an age file key.
14pub const FILE_KEY_BYTES: usize = 16;
15
16/// A file key for encrypting or decrypting an age file.
17pub struct FileKey(SecretBox<[u8; FILE_KEY_BYTES]>);
18
19impl FileKey {
20 /// Creates a file key using a pre-boxed key.
21 pub fn new(file_key: Box<[u8; FILE_KEY_BYTES]>) -> Self {
22 Self(SecretBox::new(file_key))
23 }
24
25 /// Creates a file key using a function that can initialize the key in-place.
26 pub fn init_with_mut(ctr: impl FnOnce(&mut [u8; FILE_KEY_BYTES])) -> Self {
27 Self(SecretBox::init_with_mut(ctr))
28 }
29
30 /// Same as [`Self::init_with_mut`], but the constructor can be fallible.
31 pub fn try_init_with_mut<E>(
32 ctr: impl FnOnce(&mut [u8; FILE_KEY_BYTES]) -> Result<(), E>,
33 ) -> Result<Self, E> {
34 let mut file_key = SecretBox::new(Box::new([0; FILE_KEY_BYTES]));
35 ctr(file_key.expose_secret_mut())?;
36 Ok(Self(file_key))
37 }
38}
39
40impl ExposeSecret<[u8; FILE_KEY_BYTES]> for FileKey {
41 fn expose_secret(&self) -> &[u8; FILE_KEY_BYTES] {
42 self.0.expose_secret()
43 }
44}
45
46/// A section of the age header that encapsulates the file key as encrypted to a specific
47/// recipient.
48///
49/// This is the reference type; see [`Stanza`] for the owned type.
50#[derive(Debug)]
51pub struct AgeStanza<'a> {
52 /// A tag identifying this stanza type.
53 pub tag: &'a str,
54 /// Zero or more arguments.
55 pub args: Vec<&'a str>,
56 /// The body of the stanza, containing a wrapped [`FileKey`].
57 ///
58 /// Represented as the set of Base64-encoded lines for efficiency (so the caller can
59 /// defer the cost of decoding until the structure containing this stanza has been
60 /// fully-parsed).
61 body: Vec<&'a [u8]>,
62}
63
64impl<'a> AgeStanza<'a> {
65 /// Decodes and returns the body of this stanza.
66 pub fn body(&self) -> Vec<u8> {
67 // An AgeStanza will always contain at least one chunk.
68 let (partial_chunk, full_chunks) = self.body.split_last().unwrap();
69
70 // This is faster than collecting from a flattened iterator.
71 let mut data = vec![0; full_chunks.len() * 64 + partial_chunk.len()];
72 for (i, chunk) in full_chunks.iter().enumerate() {
73 // These chunks are guaranteed to be full by construction.
74 data[i * 64..(i + 1) * 64].copy_from_slice(chunk);
75 }
76 data[full_chunks.len() * 64..].copy_from_slice(partial_chunk);
77
78 // The chunks are guaranteed to contain Base64 characters by construction.
79 BASE64_STANDARD_NO_PAD.decode(&data).unwrap()
80 }
81}
82
83/// A section of the age header that encapsulates the file key as encrypted to a specific
84/// recipient.
85///
86/// This is the owned type; see [`AgeStanza`] for the reference type.
87#[derive(Debug, PartialEq, Eq)]
88pub struct Stanza {
89 /// A tag identifying this stanza type.
90 pub tag: String,
91 /// Zero or more arguments.
92 pub args: Vec<String>,
93 /// The body of the stanza, containing a wrapped [`FileKey`].
94 pub body: Vec<u8>,
95}
96
97impl From<AgeStanza<'_>> for Stanza {
98 fn from(stanza: AgeStanza<'_>) -> Self {
99 let body = stanza.body();
100 Stanza {
101 tag: stanza.tag.to_string(),
102 args: stanza.args.into_iter().map(|s| s.to_string()).collect(),
103 body,
104 }
105 }
106}
107
108/// Checks whether the string is a valid age "arbitrary string" (`1*VCHAR` in ABNF).
109pub fn is_arbitrary_string<S: AsRef<str>>(s: &S) -> bool {
110 let s = s.as_ref();
111 !s.is_empty()
112 && s.chars().all(|c| match u8::try_from(c) {
113 Ok(u) => (33..=126).contains(&u),
114 Err(_) => false,
115 })
116}
117
118/// Creates a random recipient stanza that exercises the joint in the age v1 format.
119///
120/// This function is guaranteed to return a valid stanza, but makes no other guarantees
121/// about the stanza's fields.
122pub fn grease_the_joint() -> Stanza {
123 // Generate arbitrary strings between 1 and 9 characters long.
124 fn gen_arbitrary_string<R: RngCore>(rng: &mut R) -> String {
125 let length = Uniform::from(1..9).sample(rng);
126 Uniform::from(33..=126)
127 .sample_iter(rng)
128 .map(char::from)
129 .take(length)
130 .collect()
131 }
132
133 let mut rng = thread_rng();
134
135 // Add a suffix to the random tag so users know what is going on.
136 let tag = format!("{}-grease", gen_arbitrary_string(&mut rng));
137
138 // Between this and the above generation bounds, the first line of the recipient
139 // stanza will be between eight and 66 characters.
140 let args = (0..Uniform::from(0..5).sample(&mut rng))
141 .map(|_| gen_arbitrary_string(&mut rng))
142 .collect();
143
144 // A length between 0 and 100 bytes exercises the following stanza bodies:
145 // - Empty
146 // - Single short-line
147 // - Single full-line
148 // - Two lines, second short
149 // - Two lines, both full
150 // - Three lines, last short
151 let mut body = vec![0; Uniform::from(0..100).sample(&mut rng)];
152 rng.fill_bytes(&mut body);
153
154 Stanza { tag, args, body }
155}
156
157/// Decoding operations for age types.
158pub mod read {
159 use nom::{
160 branch::alt,
161 bytes::streaming::{tag, take_while1, take_while_m_n},
162 character::streaming::newline,
163 combinator::{map, map_opt, opt, verify},
164 multi::{many_till, separated_list1},
165 sequence::{pair, preceded, terminated},
166 IResult,
167 };
168
169 use super::{AgeStanza, STANZA_TAG};
170
171 fn is_base64_char(c: u8) -> bool {
172 // Check against the ASCII values of the standard Base64 character set.
173 matches!(
174 c,
175 // A..=Z | a..=z | 0..=9 | + | /
176 65..=90 | 97..=122 | 48..=57 | 43 | 47,
177 )
178 }
179
180 /// Returns true if the byte is one of the specific ASCII values of the standard
181 /// Base64 character set which leave trailing bits when they occur as the last
182 /// character in an encoding of length 2 mod 4.
183 fn base64_has_no_trailing_bits_2(c: &u8) -> bool {
184 // With two trailing characters, the last character has up to four trailing bits.
185 matches!(
186 c,
187 // A | Q | g | w
188 65 | 81 | 103 | 119,
189 )
190 }
191
192 /// Returns true if the byte is one of the specific ASCII values of the standard
193 /// Base64 character set which leave trailing bits when they occur as the last
194 /// character in an encoding of length 3 mod 4.
195 fn base64_has_no_trailing_bits_3(c: &u8) -> bool {
196 // With three trailing characters, the last character has up to two trailing bits.
197 matches!(
198 c,
199 // A | E | I | M | Q | U | Y | c | g | k | o | s | w | 0 | 4 | 8
200 65 | 69 | 73 | 77 | 81 | 85 | 89 | 99 | 103 | 107 | 111 | 115 | 119 | 48 | 52 | 56,
201 )
202 }
203
204 /// Reads an age "arbitrary string".
205 ///
206 /// From the age specification:
207 /// ```text
208 /// ... an arbitrary string is a sequence of ASCII characters with values 33 to 126.
209 /// ```
210 pub fn arbitrary_string(input: &[u8]) -> IResult<&[u8], &str> {
211 map(take_while1(|c| (33..=126).contains(&c)), |bytes| {
212 // Safety: ASCII bytes are valid UTF-8
213 unsafe { std::str::from_utf8_unchecked(bytes) }
214 })(input)
215 }
216
217 fn wrapped_encoded_data(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
218 map(
219 many_till(
220 // Any body lines before the last MUST be full-length.
221 terminated(take_while_m_n(64, 64, is_base64_char), newline),
222 // Last body line:
223 // - MUST be short (empty if necessary).
224 // - MUST be a valid Base64 length (i.e. the length must not be 1 mod 4).
225 // - MUST NOT leave trailing bits (if the length is 2 or 3 mod 4).
226 verify(
227 terminated(take_while_m_n(0, 63, is_base64_char), newline),
228 |line: &[u8]| match line.len() % 4 {
229 0 => true,
230 1 => false,
231 2 => base64_has_no_trailing_bits_2(line.last().unwrap()),
232 3 => base64_has_no_trailing_bits_3(line.last().unwrap()),
233 // No other cases, but Rust wants an exhaustive match on u8.
234 _ => unreachable!(),
235 },
236 ),
237 ),
238 |(full_chunks, partial_chunk): (Vec<&[u8]>, &[u8])| {
239 let mut chunks = full_chunks;
240 chunks.push(partial_chunk);
241 chunks
242 },
243 )(input)
244 }
245
246 fn legacy_wrapped_encoded_data(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
247 map_opt(
248 separated_list1(newline, take_while1(is_base64_char)),
249 |chunks: Vec<&[u8]>| {
250 // Enforce that the only chunk allowed to be shorter than 64 characters
251 // is the last chunk, and that its length must not be 1 mod 4.
252 let (partial_chunk, full_chunks) = chunks.split_last().unwrap();
253 if full_chunks.iter().any(|s| s.len() != 64)
254 || partial_chunk.len() > 64
255 || partial_chunk.len() % 4 == 1
256 || (partial_chunk.len() % 4 == 2
257 && !base64_has_no_trailing_bits_2(partial_chunk.last().unwrap()))
258 || (partial_chunk.len() % 4 == 3
259 && !base64_has_no_trailing_bits_3(partial_chunk.last().unwrap()))
260 {
261 None
262 } else {
263 Some(chunks)
264 }
265 },
266 )(input)
267 }
268
269 /// Reads an age stanza.
270 ///
271 /// From the age spec:
272 /// ```text
273 /// Each recipient stanza starts with a line beginning with -> and its type name,
274 /// followed by zero or more SP-separated arguments. The type name and the arguments
275 /// are arbitrary strings. Unknown recipient types are ignored. The rest of the
276 /// recipient stanza is a body of canonical base64 from RFC 4648 without padding
277 /// wrapped at exactly 64 columns.
278 /// ```
279 pub fn age_stanza(input: &[u8]) -> IResult<&[u8], AgeStanza<'_>> {
280 map(
281 pair(
282 preceded(
283 tag(STANZA_TAG),
284 terminated(separated_list1(tag(" "), arbitrary_string), newline),
285 ),
286 wrapped_encoded_data,
287 ),
288 |(mut args, body)| {
289 let tag = args.remove(0);
290 AgeStanza { tag, args, body }
291 },
292 )(input)
293 }
294
295 fn legacy_age_stanza_inner(input: &[u8]) -> IResult<&[u8], AgeStanza<'_>> {
296 map(
297 pair(
298 preceded(tag(STANZA_TAG), separated_list1(tag(" "), arbitrary_string)),
299 terminated(opt(preceded(newline, legacy_wrapped_encoded_data)), newline),
300 ),
301 |(mut args, body)| {
302 let tag = args.remove(0);
303 AgeStanza {
304 tag,
305 args,
306 body: body.unwrap_or_else(|| vec![&[]]),
307 }
308 },
309 )(input)
310 }
311
312 /// Reads a age stanza, allowing the legacy encoding of an body.
313 ///
314 /// From the age spec:
315 /// ```text
316 /// Each recipient stanza starts with a line beginning with -> and its type name,
317 /// followed by zero or more SP-separated arguments. The type name and the arguments
318 /// are arbitrary strings. Unknown recipient types are ignored. The rest of the
319 /// recipient stanza is a body of canonical base64 from RFC 4648 without padding
320 /// wrapped at exactly 64 columns.
321 /// ```
322 ///
323 /// The spec was originally unclear about how to encode a stanza body. Both age and
324 /// rage implemented the encoding in a way such that a stanza with a body of length of
325 /// 0 mod 64 was indistinguishable from an incomplete stanza. The spec now requires a
326 /// stanza body to always be terminated with a short line (empty if necessary). This
327 /// API exists to handle files that include the legacy encoding. The only known
328 /// generator of 0 mod 64 bodies is [`grease_the_joint`], so this should only affect
329 /// age files encrypted with beta versions of the `age` or `rage` crates.
330 ///
331 /// [`grease_the_joint`]: super::grease_the_joint
332 pub fn legacy_age_stanza(input: &[u8]) -> IResult<&[u8], AgeStanza<'_>> {
333 alt((age_stanza, legacy_age_stanza_inner))(input)
334 }
335
336 #[cfg(test)]
337 mod tests {
338 use super::*;
339
340 #[test]
341 fn base64_padding_rejected() {
342 assert!(wrapped_encoded_data(b"Tm8gcGFkZGluZyE\n").is_ok());
343 assert!(wrapped_encoded_data(b"Tm8gcGFkZGluZyE=\n").is_err());
344 // Internal padding is also rejected.
345 assert!(wrapped_encoded_data(b"SW50ZXJuYWwUGFk\n").is_ok());
346 assert!(wrapped_encoded_data(b"SW50ZXJuYWw=UGFk\n").is_err());
347 }
348 }
349}
350
351/// Encoding operations for age types.
352pub mod write {
353 use base64::{prelude::BASE64_STANDARD_NO_PAD, Engine};
354 use cookie_factory::{
355 combinator::string,
356 multi::separated_list,
357 sequence::{pair, tuple},
358 SerializeFn, WriteContext,
359 };
360 use std::io::Write;
361 use std::iter;
362
363 use super::STANZA_TAG;
364
365 fn wrapped_encoded_data<'a, W: 'a + Write>(data: &[u8]) -> impl SerializeFn<W> + 'a {
366 let encoded = BASE64_STANDARD_NO_PAD.encode(data);
367
368 move |mut w: WriteContext<W>| {
369 let mut s = encoded.as_str();
370
371 // Write full body lines.
372 while s.len() >= 64 {
373 let (l, r) = s.split_at(64);
374 w = pair(string(l), string("\n"))(w)?;
375 s = r;
376 }
377
378 // Last body line MUST be short (empty if necessary).
379 pair(string(s), string("\n"))(w)
380 }
381 }
382
383 /// Writes an age stanza.
384 pub fn age_stanza<'a, W: 'a + Write, S: AsRef<str>>(
385 tag: &'a str,
386 args: &'a [S],
387 body: &'a [u8],
388 ) -> impl SerializeFn<W> + 'a {
389 pair(
390 tuple((
391 string(STANZA_TAG),
392 separated_list(
393 string(" "),
394 iter::once(tag)
395 .chain(args.iter().map(|s| s.as_ref()))
396 .map(string),
397 ),
398 string("\n"),
399 )),
400 wrapped_encoded_data(body),
401 )
402 }
403}
404
405#[cfg(test)]
406mod tests {
407 use base64::{prelude::BASE64_STANDARD_NO_PAD, Engine};
408 use nom::error::ErrorKind;
409
410 use super::{read, write};
411
412 #[test]
413 fn parse_age_stanza() {
414 let test_tag = "X25519";
415 let test_args = &["CJM36AHmTbdHSuOQL+NESqyVQE75f2e610iRdLPEN20"];
416 let test_body = BASE64_STANDARD_NO_PAD
417 .decode("C3ZAeY64NXS4QFrksLm3EGz+uPRyI0eQsWw7LWbbYig")
418 .unwrap();
419
420 // The only body line is short, so we don't need a trailing empty line.
421 let test_stanza = "-> X25519 CJM36AHmTbdHSuOQL+NESqyVQE75f2e610iRdLPEN20
422C3ZAeY64NXS4QFrksLm3EGz+uPRyI0eQsWw7LWbbYig
423";
424
425 let (_, stanza) = read::age_stanza(test_stanza.as_bytes()).unwrap();
426 assert_eq!(stanza.tag, test_tag);
427 assert_eq!(stanza.args, test_args);
428 assert_eq!(stanza.body(), test_body);
429
430 let mut buf = vec![];
431 cookie_factory::gen_simple(write::age_stanza(test_tag, test_args, &test_body), &mut buf)
432 .unwrap();
433 assert_eq!(buf, test_stanza.as_bytes());
434 }
435
436 #[test]
437 fn age_stanza_with_empty_body() {
438 let test_tag = "empty-body";
439 let test_args = &["some", "arguments"];
440 let test_body = &[];
441
442 // The body is empty, so it is represented with an empty line.
443 let test_stanza = "-> empty-body some arguments
444
445";
446
447 let (_, stanza) = read::age_stanza(test_stanza.as_bytes()).unwrap();
448 assert_eq!(stanza.tag, test_tag);
449 assert_eq!(stanza.args, test_args);
450 assert_eq!(stanza.body(), test_body);
451
452 let mut buf = vec![];
453 cookie_factory::gen_simple(write::age_stanza(test_tag, test_args, test_body), &mut buf)
454 .unwrap();
455 assert_eq!(buf, test_stanza.as_bytes());
456 }
457
458 #[test]
459 fn age_stanza_with_full_body() {
460 let test_tag = "full-body";
461 let test_args = &["some", "arguments"];
462 let test_body = BASE64_STANDARD_NO_PAD
463 .decode("xD7o4VEOu1t7KZQ1gDgq2FPzBEeSRqbnqvQEXdLRYy143BxR6oFxsUUJCRB0ErXA")
464 .unwrap();
465
466 // The body fills a complete line, so it requires a trailing empty line.
467 let test_stanza = "-> full-body some arguments
468xD7o4VEOu1t7KZQ1gDgq2FPzBEeSRqbnqvQEXdLRYy143BxR6oFxsUUJCRB0ErXA
469
470";
471
472 let (_, stanza) = read::age_stanza(test_stanza.as_bytes()).unwrap();
473 assert_eq!(stanza.tag, test_tag);
474 assert_eq!(stanza.args, test_args);
475 assert_eq!(stanza.body(), test_body);
476
477 let mut buf = vec![];
478 cookie_factory::gen_simple(write::age_stanza(test_tag, test_args, &test_body), &mut buf)
479 .unwrap();
480 assert_eq!(buf, test_stanza.as_bytes());
481 }
482
483 #[test]
484 fn age_stanza_with_legacy_full_body() {
485 let test_tag = "full-body";
486 let test_args = &["some", "arguments"];
487 let test_body = BASE64_STANDARD_NO_PAD
488 .decode("xD7o4VEOu1t7KZQ1gDgq2FPzBEeSRqbnqvQEXdLRYy143BxR6oFxsUUJCRB0ErXA")
489 .unwrap();
490
491 // The body fills a complete line, but lacks a trailing empty line.
492 let test_stanza = "-> full-body some arguments
493xD7o4VEOu1t7KZQ1gDgq2FPzBEeSRqbnqvQEXdLRYy143BxR6oFxsUUJCRB0ErXA
494--- header end
495";
496
497 // The normal parser returns an error.
498 assert!(read::age_stanza(test_stanza.as_bytes()).is_err());
499
500 // We can parse with the legacy parser
501 let (_, stanza) = read::legacy_age_stanza(test_stanza.as_bytes()).unwrap();
502 assert_eq!(stanza.tag, test_tag);
503 assert_eq!(stanza.args, test_args);
504 assert_eq!(stanza.body(), test_body);
505 }
506
507 #[test]
508 fn age_stanza_invalid_last_line() {
509 // Artifact found by cargo-fuzz on commit 81f91581bf7e21075519dc23e4a28b4d201dd784
510 // We add an extra newline to the artifact so that we would "correctly" trigger
511 // the bug in the legacy part of `read::legacy_age_stanza`.
512 let artifact = "-> H
513/
514
515";
516
517 // The stanza parser requires the last body line is short (possibly empty), so
518 // should reject this artifact.
519 match read::age_stanza(artifact.as_bytes()) {
520 Err(nom::Err::Error(e)) => assert_eq!(e.code, ErrorKind::TakeWhileMN),
521 Err(e) => panic!("Unexpected error: {}", e),
522 Ok((rest, stanza)) => {
523 assert_eq!(rest, b"\n");
524 // This is where the fuzzer triggered a panic.
525 let _ = stanza.body();
526 // We should never reach here either before or after the bug was fixed,
527 // because the body length is invalid.
528 panic!("Invalid test case was parsed without error");
529 }
530 }
531
532 // The legacy parser accepts this artifact by ignoring the invalid body line,
533 // because bodies were allowed to be omitted.
534 let (rest, stanza) = read::legacy_age_stanza(artifact.as_bytes()).unwrap();
535 // The remainder should the invalid body line. If the standard parser were fixed
536 // but the legacy parser was not, this would only contain a single newline.
537 assert_eq!(rest, b"/\n\n");
538 // This is where the fuzzer would have triggered a panic if it were using the
539 // legacy parser.
540 let body = stanza.body();
541 assert!(body.is_empty());
542 }
543
544 #[test]
545 fn age_stanza_last_line_two_trailing_chars() {
546 // Artifact found by cargo-fuzz on commit 8da15148fc005a48ffeb43eb76dab478eb2fdf72
547 // We add an extra newline to the artifact so that we would "correctly" trigger
548 // the bug in the legacy part of `read::legacy_age_stanza`.
549 let artifact = "-> '
550dy
551
552";
553
554 // The stanza parser requires the last body line is short (possibly empty), so
555 // should reject this artifact.
556 match read::age_stanza(artifact.as_bytes()) {
557 Err(nom::Err::Error(e)) => assert_eq!(e.code, ErrorKind::TakeWhileMN),
558 Err(e) => panic!("Unexpected error: {}", e),
559 Ok((rest, stanza)) => {
560 assert_eq!(rest, b"\n");
561 // This is where the fuzzer triggered a panic.
562 let _ = stanza.body();
563 // We should never reach here either before or after the bug was fixed,
564 // because the last body line has trailing bits.
565 panic!("Invalid test case was parsed without error");
566 }
567 }
568
569 // The legacy parser accepts this artifact by ignoring the invalid body line,
570 // because bodies were allowed to be omitted.
571 let (rest, stanza) = read::legacy_age_stanza(artifact.as_bytes()).unwrap();
572 // The remainder should the invalid body line. If the standard parser were fixed
573 // but the legacy parser was not, this would only contain a single newline.
574 assert_eq!(rest, b"dy\n\n");
575 // This is where the fuzzer would have triggered a panic if it were using the
576 // legacy parser.
577 let body = stanza.body();
578 assert!(body.is_empty());
579 }
580
581 #[test]
582 fn age_stanza_last_line_three_trailing_chars() {
583 // Artifact found by cargo-fuzz after age_stanza_last_line_two_trailing_chars was
584 // incorrectly fixed.
585 let artifact = "-> h
586ddd
587
588";
589
590 // The stanza parser requires the last body line is short (possibly empty), so
591 // should reject this artifact.
592 match read::age_stanza(artifact.as_bytes()) {
593 Err(nom::Err::Error(e)) => assert_eq!(e.code, ErrorKind::TakeWhileMN),
594 Err(e) => panic!("Unexpected error: {}", e),
595 Ok((rest, stanza)) => {
596 assert_eq!(rest, b"\n");
597 // This is where the fuzzer triggered a panic.
598 let _ = stanza.body();
599 // We should never reach here either before or after the bug was fixed,
600 // because the last body line has trailing bits.
601 panic!("Invalid test case was parsed without error");
602 }
603 }
604
605 // The legacy parser accepts this artifact by ignoring the invalid body line,
606 // because bodies were allowed to be omitted.
607 let (rest, stanza) = read::legacy_age_stanza(artifact.as_bytes()).unwrap();
608 // The remainder should the invalid body line. If the standard parser were fixed
609 // but the legacy parser was not, this would only contain a single newline.
610 assert_eq!(rest, b"ddd\n\n");
611 // This is where the fuzzer would have triggered a panic if it were using the
612 // legacy parser.
613 let body = stanza.body();
614 assert!(body.is_empty());
615 }
616}