age_core/
format.rs

1//! Core types and encoding operations used by the age file format.
2
3use base64::{prelude::BASE64_STANDARD_NO_PAD, Engine};
4use rand::{
5    distributions::{Distribution, Uniform},
6    thread_rng, RngCore,
7};
8use secrecy::{ExposeSecret, ExposeSecretMut, SecretBox};
9
10/// The prefix identifying an age stanza.
11const STANZA_TAG: &str = "-> ";
12
13/// The length of an age file key.
14pub const FILE_KEY_BYTES: usize = 16;
15
16/// A file key for encrypting or decrypting an age file.
17pub struct FileKey(SecretBox<[u8; FILE_KEY_BYTES]>);
18
19impl FileKey {
20    /// Creates a file key using a pre-boxed key.
21    pub fn new(file_key: Box<[u8; FILE_KEY_BYTES]>) -> Self {
22        Self(SecretBox::new(file_key))
23    }
24
25    /// Creates a file key using a function that can initialize the key in-place.
26    pub fn init_with_mut(ctr: impl FnOnce(&mut [u8; FILE_KEY_BYTES])) -> Self {
27        Self(SecretBox::init_with_mut(ctr))
28    }
29
30    /// Same as [`Self::init_with_mut`], but the constructor can be fallible.
31    pub fn try_init_with_mut<E>(
32        ctr: impl FnOnce(&mut [u8; FILE_KEY_BYTES]) -> Result<(), E>,
33    ) -> Result<Self, E> {
34        let mut file_key = SecretBox::new(Box::new([0; FILE_KEY_BYTES]));
35        ctr(file_key.expose_secret_mut())?;
36        Ok(Self(file_key))
37    }
38}
39
40impl ExposeSecret<[u8; FILE_KEY_BYTES]> for FileKey {
41    fn expose_secret(&self) -> &[u8; FILE_KEY_BYTES] {
42        self.0.expose_secret()
43    }
44}
45
46/// A section of the age header that encapsulates the file key as encrypted to a specific
47/// recipient.
48///
49/// This is the reference type; see [`Stanza`] for the owned type.
50#[derive(Debug)]
51pub struct AgeStanza<'a> {
52    /// A tag identifying this stanza type.
53    pub tag: &'a str,
54    /// Zero or more arguments.
55    pub args: Vec<&'a str>,
56    /// The body of the stanza, containing a wrapped [`FileKey`].
57    ///
58    /// Represented as the set of Base64-encoded lines for efficiency (so the caller can
59    /// defer the cost of decoding until the structure containing this stanza has been
60    /// fully-parsed).
61    body: Vec<&'a [u8]>,
62}
63
64impl<'a> AgeStanza<'a> {
65    /// Decodes and returns the body of this stanza.
66    pub fn body(&self) -> Vec<u8> {
67        // An AgeStanza will always contain at least one chunk.
68        let (partial_chunk, full_chunks) = self.body.split_last().unwrap();
69
70        // This is faster than collecting from a flattened iterator.
71        let mut data = vec![0; full_chunks.len() * 64 + partial_chunk.len()];
72        for (i, chunk) in full_chunks.iter().enumerate() {
73            // These chunks are guaranteed to be full by construction.
74            data[i * 64..(i + 1) * 64].copy_from_slice(chunk);
75        }
76        data[full_chunks.len() * 64..].copy_from_slice(partial_chunk);
77
78        // The chunks are guaranteed to contain Base64 characters by construction.
79        BASE64_STANDARD_NO_PAD.decode(&data).unwrap()
80    }
81}
82
83/// A section of the age header that encapsulates the file key as encrypted to a specific
84/// recipient.
85///
86/// This is the owned type; see [`AgeStanza`] for the reference type.
87#[derive(Debug, PartialEq, Eq)]
88pub struct Stanza {
89    /// A tag identifying this stanza type.
90    pub tag: String,
91    /// Zero or more arguments.
92    pub args: Vec<String>,
93    /// The body of the stanza, containing a wrapped [`FileKey`].
94    pub body: Vec<u8>,
95}
96
97impl From<AgeStanza<'_>> for Stanza {
98    fn from(stanza: AgeStanza<'_>) -> Self {
99        let body = stanza.body();
100        Stanza {
101            tag: stanza.tag.to_string(),
102            args: stanza.args.into_iter().map(|s| s.to_string()).collect(),
103            body,
104        }
105    }
106}
107
108/// Checks whether the string is a valid age "arbitrary string" (`1*VCHAR` in ABNF).
109pub fn is_arbitrary_string<S: AsRef<str>>(s: &S) -> bool {
110    let s = s.as_ref();
111    !s.is_empty()
112        && s.chars().all(|c| match u8::try_from(c) {
113            Ok(u) => (33..=126).contains(&u),
114            Err(_) => false,
115        })
116}
117
118/// Creates a random recipient stanza that exercises the joint in the age v1 format.
119///
120/// This function is guaranteed to return a valid stanza, but makes no other guarantees
121/// about the stanza's fields.
122pub fn grease_the_joint() -> Stanza {
123    // Generate arbitrary strings between 1 and 9 characters long.
124    fn gen_arbitrary_string<R: RngCore>(rng: &mut R) -> String {
125        let length = Uniform::from(1..9).sample(rng);
126        Uniform::from(33..=126)
127            .sample_iter(rng)
128            .map(char::from)
129            .take(length)
130            .collect()
131    }
132
133    let mut rng = thread_rng();
134
135    // Add a suffix to the random tag so users know what is going on.
136    let tag = format!("{}-grease", gen_arbitrary_string(&mut rng));
137
138    // Between this and the above generation bounds, the first line of the recipient
139    // stanza will be between eight and 66 characters.
140    let args = (0..Uniform::from(0..5).sample(&mut rng))
141        .map(|_| gen_arbitrary_string(&mut rng))
142        .collect();
143
144    // A length between 0 and 100 bytes exercises the following stanza bodies:
145    // - Empty
146    // - Single short-line
147    // - Single full-line
148    // - Two lines, second short
149    // - Two lines, both full
150    // - Three lines, last short
151    let mut body = vec![0; Uniform::from(0..100).sample(&mut rng)];
152    rng.fill_bytes(&mut body);
153
154    Stanza { tag, args, body }
155}
156
157/// Decoding operations for age types.
158pub mod read {
159    use nom::{
160        branch::alt,
161        bytes::streaming::{tag, take_while1, take_while_m_n},
162        character::streaming::newline,
163        combinator::{map, map_opt, opt, verify},
164        multi::{many_till, separated_list1},
165        sequence::{pair, preceded, terminated},
166        IResult,
167    };
168
169    use super::{AgeStanza, STANZA_TAG};
170
171    fn is_base64_char(c: u8) -> bool {
172        // Check against the ASCII values of the standard Base64 character set.
173        matches!(
174            c,
175            // A..=Z | a..=z | 0..=9 | + | /
176            65..=90 | 97..=122 | 48..=57 | 43 | 47,
177        )
178    }
179
180    /// Returns true if the byte is one of the specific ASCII values of the standard
181    /// Base64 character set which leave trailing bits when they occur as the last
182    /// character in an encoding of length 2 mod 4.
183    fn base64_has_no_trailing_bits_2(c: &u8) -> bool {
184        // With two trailing characters, the last character has up to four trailing bits.
185        matches!(
186            c,
187            // A | Q | g | w
188            65 | 81 | 103 | 119,
189        )
190    }
191
192    /// Returns true if the byte is one of the specific ASCII values of the standard
193    /// Base64 character set which leave trailing bits when they occur as the last
194    /// character in an encoding of length 3 mod 4.
195    fn base64_has_no_trailing_bits_3(c: &u8) -> bool {
196        // With three trailing characters, the last character has up to two trailing bits.
197        matches!(
198            c,
199            // A | E | I | M | Q | U | Y | c | g | k | o | s | w | 0 | 4 | 8
200            65 | 69 | 73 | 77 | 81 | 85 | 89 | 99 | 103 | 107 | 111 | 115 | 119 | 48 | 52 | 56,
201        )
202    }
203
204    /// Reads an age "arbitrary string".
205    ///
206    /// From the age specification:
207    /// ```text
208    /// ... an arbitrary string is a sequence of ASCII characters with values 33 to 126.
209    /// ```
210    pub fn arbitrary_string(input: &[u8]) -> IResult<&[u8], &str> {
211        map(take_while1(|c| (33..=126).contains(&c)), |bytes| {
212            // Safety: ASCII bytes are valid UTF-8
213            unsafe { std::str::from_utf8_unchecked(bytes) }
214        })(input)
215    }
216
217    fn wrapped_encoded_data(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
218        map(
219            many_till(
220                // Any body lines before the last MUST be full-length.
221                terminated(take_while_m_n(64, 64, is_base64_char), newline),
222                // Last body line:
223                // - MUST be short (empty if necessary).
224                // - MUST be a valid Base64 length (i.e. the length must not be 1 mod 4).
225                // - MUST NOT leave trailing bits (if the length is 2 or 3 mod 4).
226                verify(
227                    terminated(take_while_m_n(0, 63, is_base64_char), newline),
228                    |line: &[u8]| match line.len() % 4 {
229                        0 => true,
230                        1 => false,
231                        2 => base64_has_no_trailing_bits_2(line.last().unwrap()),
232                        3 => base64_has_no_trailing_bits_3(line.last().unwrap()),
233                        // No other cases, but Rust wants an exhaustive match on u8.
234                        _ => unreachable!(),
235                    },
236                ),
237            ),
238            |(full_chunks, partial_chunk): (Vec<&[u8]>, &[u8])| {
239                let mut chunks = full_chunks;
240                chunks.push(partial_chunk);
241                chunks
242            },
243        )(input)
244    }
245
246    fn legacy_wrapped_encoded_data(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
247        map_opt(
248            separated_list1(newline, take_while1(is_base64_char)),
249            |chunks: Vec<&[u8]>| {
250                // Enforce that the only chunk allowed to be shorter than 64 characters
251                // is the last chunk, and that its length must not be 1 mod 4.
252                let (partial_chunk, full_chunks) = chunks.split_last().unwrap();
253                if full_chunks.iter().any(|s| s.len() != 64)
254                    || partial_chunk.len() > 64
255                    || partial_chunk.len() % 4 == 1
256                    || (partial_chunk.len() % 4 == 2
257                        && !base64_has_no_trailing_bits_2(partial_chunk.last().unwrap()))
258                    || (partial_chunk.len() % 4 == 3
259                        && !base64_has_no_trailing_bits_3(partial_chunk.last().unwrap()))
260                {
261                    None
262                } else {
263                    Some(chunks)
264                }
265            },
266        )(input)
267    }
268
269    /// Reads an age stanza.
270    ///
271    /// From the age spec:
272    /// ```text
273    /// Each recipient stanza starts with a line beginning with -> and its type name,
274    /// followed by zero or more SP-separated arguments. The type name and the arguments
275    /// are arbitrary strings. Unknown recipient types are ignored. The rest of the
276    /// recipient stanza is a body of canonical base64 from RFC 4648 without padding
277    /// wrapped at exactly 64 columns.
278    /// ```
279    pub fn age_stanza(input: &[u8]) -> IResult<&[u8], AgeStanza<'_>> {
280        map(
281            pair(
282                preceded(
283                    tag(STANZA_TAG),
284                    terminated(separated_list1(tag(" "), arbitrary_string), newline),
285                ),
286                wrapped_encoded_data,
287            ),
288            |(mut args, body)| {
289                let tag = args.remove(0);
290                AgeStanza { tag, args, body }
291            },
292        )(input)
293    }
294
295    fn legacy_age_stanza_inner(input: &[u8]) -> IResult<&[u8], AgeStanza<'_>> {
296        map(
297            pair(
298                preceded(tag(STANZA_TAG), separated_list1(tag(" "), arbitrary_string)),
299                terminated(opt(preceded(newline, legacy_wrapped_encoded_data)), newline),
300            ),
301            |(mut args, body)| {
302                let tag = args.remove(0);
303                AgeStanza {
304                    tag,
305                    args,
306                    body: body.unwrap_or_else(|| vec![&[]]),
307                }
308            },
309        )(input)
310    }
311
312    /// Reads a age stanza, allowing the legacy encoding of an body.
313    ///
314    /// From the age spec:
315    /// ```text
316    /// Each recipient stanza starts with a line beginning with -> and its type name,
317    /// followed by zero or more SP-separated arguments. The type name and the arguments
318    /// are arbitrary strings. Unknown recipient types are ignored. The rest of the
319    /// recipient stanza is a body of canonical base64 from RFC 4648 without padding
320    /// wrapped at exactly 64 columns.
321    /// ```
322    ///
323    /// The spec was originally unclear about how to encode a stanza body. Both age and
324    /// rage implemented the encoding in a way such that a stanza with a body of length of
325    /// 0 mod 64 was indistinguishable from an incomplete stanza. The spec now requires a
326    /// stanza body to always be terminated with a short line (empty if necessary). This
327    /// API exists to handle files that include the legacy encoding. The only known
328    /// generator of 0 mod 64 bodies is [`grease_the_joint`], so this should only affect
329    /// age files encrypted with beta versions of the `age` or `rage` crates.
330    ///
331    /// [`grease_the_joint`]: super::grease_the_joint
332    pub fn legacy_age_stanza(input: &[u8]) -> IResult<&[u8], AgeStanza<'_>> {
333        alt((age_stanza, legacy_age_stanza_inner))(input)
334    }
335
336    #[cfg(test)]
337    mod tests {
338        use super::*;
339
340        #[test]
341        fn base64_padding_rejected() {
342            assert!(wrapped_encoded_data(b"Tm8gcGFkZGluZyE\n").is_ok());
343            assert!(wrapped_encoded_data(b"Tm8gcGFkZGluZyE=\n").is_err());
344            // Internal padding is also rejected.
345            assert!(wrapped_encoded_data(b"SW50ZXJuYWwUGFk\n").is_ok());
346            assert!(wrapped_encoded_data(b"SW50ZXJuYWw=UGFk\n").is_err());
347        }
348    }
349}
350
351/// Encoding operations for age types.
352pub mod write {
353    use base64::{prelude::BASE64_STANDARD_NO_PAD, Engine};
354    use cookie_factory::{
355        combinator::string,
356        multi::separated_list,
357        sequence::{pair, tuple},
358        SerializeFn, WriteContext,
359    };
360    use std::io::Write;
361    use std::iter;
362
363    use super::STANZA_TAG;
364
365    fn wrapped_encoded_data<'a, W: 'a + Write>(data: &[u8]) -> impl SerializeFn<W> + 'a {
366        let encoded = BASE64_STANDARD_NO_PAD.encode(data);
367
368        move |mut w: WriteContext<W>| {
369            let mut s = encoded.as_str();
370
371            // Write full body lines.
372            while s.len() >= 64 {
373                let (l, r) = s.split_at(64);
374                w = pair(string(l), string("\n"))(w)?;
375                s = r;
376            }
377
378            // Last body line MUST be short (empty if necessary).
379            pair(string(s), string("\n"))(w)
380        }
381    }
382
383    /// Writes an age stanza.
384    pub fn age_stanza<'a, W: 'a + Write, S: AsRef<str>>(
385        tag: &'a str,
386        args: &'a [S],
387        body: &'a [u8],
388    ) -> impl SerializeFn<W> + 'a {
389        pair(
390            tuple((
391                string(STANZA_TAG),
392                separated_list(
393                    string(" "),
394                    iter::once(tag)
395                        .chain(args.iter().map(|s| s.as_ref()))
396                        .map(string),
397                ),
398                string("\n"),
399            )),
400            wrapped_encoded_data(body),
401        )
402    }
403}
404
405#[cfg(test)]
406mod tests {
407    use base64::{prelude::BASE64_STANDARD_NO_PAD, Engine};
408    use nom::error::ErrorKind;
409
410    use super::{read, write};
411
412    #[test]
413    fn parse_age_stanza() {
414        let test_tag = "X25519";
415        let test_args = &["CJM36AHmTbdHSuOQL+NESqyVQE75f2e610iRdLPEN20"];
416        let test_body = BASE64_STANDARD_NO_PAD
417            .decode("C3ZAeY64NXS4QFrksLm3EGz+uPRyI0eQsWw7LWbbYig")
418            .unwrap();
419
420        // The only body line is short, so we don't need a trailing empty line.
421        let test_stanza = "-> X25519 CJM36AHmTbdHSuOQL+NESqyVQE75f2e610iRdLPEN20
422C3ZAeY64NXS4QFrksLm3EGz+uPRyI0eQsWw7LWbbYig
423";
424
425        let (_, stanza) = read::age_stanza(test_stanza.as_bytes()).unwrap();
426        assert_eq!(stanza.tag, test_tag);
427        assert_eq!(stanza.args, test_args);
428        assert_eq!(stanza.body(), test_body);
429
430        let mut buf = vec![];
431        cookie_factory::gen_simple(write::age_stanza(test_tag, test_args, &test_body), &mut buf)
432            .unwrap();
433        assert_eq!(buf, test_stanza.as_bytes());
434    }
435
436    #[test]
437    fn age_stanza_with_empty_body() {
438        let test_tag = "empty-body";
439        let test_args = &["some", "arguments"];
440        let test_body = &[];
441
442        // The body is empty, so it is represented with an empty line.
443        let test_stanza = "-> empty-body some arguments
444
445";
446
447        let (_, stanza) = read::age_stanza(test_stanza.as_bytes()).unwrap();
448        assert_eq!(stanza.tag, test_tag);
449        assert_eq!(stanza.args, test_args);
450        assert_eq!(stanza.body(), test_body);
451
452        let mut buf = vec![];
453        cookie_factory::gen_simple(write::age_stanza(test_tag, test_args, test_body), &mut buf)
454            .unwrap();
455        assert_eq!(buf, test_stanza.as_bytes());
456    }
457
458    #[test]
459    fn age_stanza_with_full_body() {
460        let test_tag = "full-body";
461        let test_args = &["some", "arguments"];
462        let test_body = BASE64_STANDARD_NO_PAD
463            .decode("xD7o4VEOu1t7KZQ1gDgq2FPzBEeSRqbnqvQEXdLRYy143BxR6oFxsUUJCRB0ErXA")
464            .unwrap();
465
466        // The body fills a complete line, so it requires a trailing empty line.
467        let test_stanza = "-> full-body some arguments
468xD7o4VEOu1t7KZQ1gDgq2FPzBEeSRqbnqvQEXdLRYy143BxR6oFxsUUJCRB0ErXA
469
470";
471
472        let (_, stanza) = read::age_stanza(test_stanza.as_bytes()).unwrap();
473        assert_eq!(stanza.tag, test_tag);
474        assert_eq!(stanza.args, test_args);
475        assert_eq!(stanza.body(), test_body);
476
477        let mut buf = vec![];
478        cookie_factory::gen_simple(write::age_stanza(test_tag, test_args, &test_body), &mut buf)
479            .unwrap();
480        assert_eq!(buf, test_stanza.as_bytes());
481    }
482
483    #[test]
484    fn age_stanza_with_legacy_full_body() {
485        let test_tag = "full-body";
486        let test_args = &["some", "arguments"];
487        let test_body = BASE64_STANDARD_NO_PAD
488            .decode("xD7o4VEOu1t7KZQ1gDgq2FPzBEeSRqbnqvQEXdLRYy143BxR6oFxsUUJCRB0ErXA")
489            .unwrap();
490
491        // The body fills a complete line, but lacks a trailing empty line.
492        let test_stanza = "-> full-body some arguments
493xD7o4VEOu1t7KZQ1gDgq2FPzBEeSRqbnqvQEXdLRYy143BxR6oFxsUUJCRB0ErXA
494--- header end
495";
496
497        // The normal parser returns an error.
498        assert!(read::age_stanza(test_stanza.as_bytes()).is_err());
499
500        // We can parse with the legacy parser
501        let (_, stanza) = read::legacy_age_stanza(test_stanza.as_bytes()).unwrap();
502        assert_eq!(stanza.tag, test_tag);
503        assert_eq!(stanza.args, test_args);
504        assert_eq!(stanza.body(), test_body);
505    }
506
507    #[test]
508    fn age_stanza_invalid_last_line() {
509        // Artifact found by cargo-fuzz on commit 81f91581bf7e21075519dc23e4a28b4d201dd784
510        // We add an extra newline to the artifact so that we would "correctly" trigger
511        // the bug in the legacy part of `read::legacy_age_stanza`.
512        let artifact = "-> H
513/
514
515";
516
517        // The stanza parser requires the last body line is short (possibly empty), so
518        // should reject this artifact.
519        match read::age_stanza(artifact.as_bytes()) {
520            Err(nom::Err::Error(e)) => assert_eq!(e.code, ErrorKind::TakeWhileMN),
521            Err(e) => panic!("Unexpected error: {}", e),
522            Ok((rest, stanza)) => {
523                assert_eq!(rest, b"\n");
524                // This is where the fuzzer triggered a panic.
525                let _ = stanza.body();
526                // We should never reach here either before or after the bug was fixed,
527                // because the body length is invalid.
528                panic!("Invalid test case was parsed without error");
529            }
530        }
531
532        // The legacy parser accepts this artifact by ignoring the invalid body line,
533        // because bodies were allowed to be omitted.
534        let (rest, stanza) = read::legacy_age_stanza(artifact.as_bytes()).unwrap();
535        // The remainder should the invalid body line. If the standard parser were fixed
536        // but the legacy parser was not, this would only contain a single newline.
537        assert_eq!(rest, b"/\n\n");
538        // This is where the fuzzer would have triggered a panic if it were using the
539        // legacy parser.
540        let body = stanza.body();
541        assert!(body.is_empty());
542    }
543
544    #[test]
545    fn age_stanza_last_line_two_trailing_chars() {
546        // Artifact found by cargo-fuzz on commit 8da15148fc005a48ffeb43eb76dab478eb2fdf72
547        // We add an extra newline to the artifact so that we would "correctly" trigger
548        // the bug in the legacy part of `read::legacy_age_stanza`.
549        let artifact = "-> '
550dy
551
552";
553
554        // The stanza parser requires the last body line is short (possibly empty), so
555        // should reject this artifact.
556        match read::age_stanza(artifact.as_bytes()) {
557            Err(nom::Err::Error(e)) => assert_eq!(e.code, ErrorKind::TakeWhileMN),
558            Err(e) => panic!("Unexpected error: {}", e),
559            Ok((rest, stanza)) => {
560                assert_eq!(rest, b"\n");
561                // This is where the fuzzer triggered a panic.
562                let _ = stanza.body();
563                // We should never reach here either before or after the bug was fixed,
564                // because the last body line has trailing bits.
565                panic!("Invalid test case was parsed without error");
566            }
567        }
568
569        // The legacy parser accepts this artifact by ignoring the invalid body line,
570        // because bodies were allowed to be omitted.
571        let (rest, stanza) = read::legacy_age_stanza(artifact.as_bytes()).unwrap();
572        // The remainder should the invalid body line. If the standard parser were fixed
573        // but the legacy parser was not, this would only contain a single newline.
574        assert_eq!(rest, b"dy\n\n");
575        // This is where the fuzzer would have triggered a panic if it were using the
576        // legacy parser.
577        let body = stanza.body();
578        assert!(body.is_empty());
579    }
580
581    #[test]
582    fn age_stanza_last_line_three_trailing_chars() {
583        // Artifact found by cargo-fuzz after age_stanza_last_line_two_trailing_chars was
584        // incorrectly fixed.
585        let artifact = "-> h
586ddd
587
588";
589
590        // The stanza parser requires the last body line is short (possibly empty), so
591        // should reject this artifact.
592        match read::age_stanza(artifact.as_bytes()) {
593            Err(nom::Err::Error(e)) => assert_eq!(e.code, ErrorKind::TakeWhileMN),
594            Err(e) => panic!("Unexpected error: {}", e),
595            Ok((rest, stanza)) => {
596                assert_eq!(rest, b"\n");
597                // This is where the fuzzer triggered a panic.
598                let _ = stanza.body();
599                // We should never reach here either before or after the bug was fixed,
600                // because the last body line has trailing bits.
601                panic!("Invalid test case was parsed without error");
602            }
603        }
604
605        // The legacy parser accepts this artifact by ignoring the invalid body line,
606        // because bodies were allowed to be omitted.
607        let (rest, stanza) = read::legacy_age_stanza(artifact.as_bytes()).unwrap();
608        // The remainder should the invalid body line. If the standard parser were fixed
609        // but the legacy parser was not, this would only contain a single newline.
610        assert_eq!(rest, b"ddd\n\n");
611        // This is where the fuzzer would have triggered a panic if it were using the
612        // legacy parser.
613        let body = stanza.body();
614        assert!(body.is_empty());
615    }
616}