synta-certificate 0.2.6

X.509 certificate structures for synta ASN.1 library
Documentation
/// Parse all PEM blocks from `pem`, returning each block's label and DER bytes.
///
/// Each entry is `(label, der)` where `label` is the text between `-----BEGIN `
/// and `-----` (e.g. `"CERTIFICATE"`, `"PRIVATE KEY"`, `"CERTIFICATE REQUEST"`).
///
/// Handles multiple mixed-type blocks, CRLF line endings, and arbitrary text
/// between blocks.  Malformed base64 or blocks without a matching `-----END`
/// marker are silently skipped.
pub fn pem_blocks(pem: &[u8]) -> Vec<(String, Vec<u8>)> {
    // Work at the byte level throughout so that non-UTF-8 content in surrounding
    // text (e.g. OpenSSL "Bag Attributes" with Latin-1 friendly names) does not
    // prevent valid PEM blocks from being parsed.  Only the label between
    // "-----BEGIN " and "-----" is required to be valid UTF-8.

    let mut results = Vec::new();
    let mut pos = 0;

    while pos < pem.len() {
        // Find the next "-----BEGIN " marker.
        let begin_rel = match pem[pos..].windows(11).position(|w| w == b"-----BEGIN ") {
            Some(i) => i,
            None => break,
        };
        let begin_abs = pos + begin_rel;

        // Find the end of the BEGIN header line.
        let nl_rel = match pem[begin_abs..].iter().position(|&b| b == b'\n') {
            Some(i) => i,
            None => break, // truncated — no newline after BEGIN
        };
        let header_end = begin_abs + nl_rel + 1;

        // Extract label: bytes between "-----BEGIN " and "-----".
        let header_line = &pem[begin_abs..begin_abs + nl_rel]; // excludes the \n
        let header_line = header_line.strip_suffix(b"\r").unwrap_or(header_line);
        if !header_line.starts_with(b"-----BEGIN ") || !header_line.ends_with(b"-----") {
            pos = header_end;
            continue;
        }
        let label_bytes = &header_line[11..header_line.len() - 5]; // between "-----BEGIN " and "-----"
        let label = match core::str::from_utf8(label_bytes) {
            Ok(s) => s.to_string(),
            Err(_) => {
                pos = header_end;
                continue;
            }
        };

        // Collect base64 body lines until the END marker.
        let mut b64: Vec<u8> = Vec::with_capacity(256);
        let mut line_start = header_end;
        let mut found_end = false;

        loop {
            let line_end = pem[line_start..]
                .iter()
                .position(|&b| b == b'\n')
                .map_or(pem.len(), |i| line_start + i);

            // Strip CR so CRLF line endings are handled correctly.
            let line = &pem[line_start..line_end];
            let line = line.strip_suffix(b"\r").unwrap_or(line);

            if line.starts_with(b"-----END") {
                pos = if line_end < pem.len() {
                    line_end + 1
                } else {
                    pem.len()
                };
                found_end = true;
                break;
            }

            b64.extend_from_slice(line);

            if line_end >= pem.len() {
                pos = pem.len();
                break; // reached EOF with no END marker
            }
            line_start = line_end + 1;
        }

        if !found_end {
            break; // malformed: no END marker for this block
        }

        if let Some(der) = decode_base64(&b64) {
            results.push((label, der));
        }
    }

    results
}

/// Dependency-free PEM → DER decoder.
///
/// Decodes every `-----BEGIN ...-----` / `-----END ...-----` block found in
/// the input and returns the base64-decoded DER bytes for each, discarding the
/// block label.  Use [`pem_blocks`] instead when you need the label (e.g. to
/// distinguish `CERTIFICATE` from `PRIVATE KEY`).
///
/// # Returns
///
/// A `Vec` containing the DER bytes of each PEM block found, in order.  An
/// empty `Vec` means no valid PEM block was found.  Malformed base64 inside a
/// block is silently skipped (the block produces no entry).  Blocks that have
/// no matching `-----END` marker are also skipped.
pub fn pem_to_der(pem: &[u8]) -> Vec<Vec<u8>> {
    pem_blocks(pem).into_iter().map(|(_, der)| der).collect()
}

/// Encode DER bytes as a single PEM block with the given `label`.
///
/// Output uses 64-character base64 lines and Unix newlines (`\n`), following
/// RFC 7468 (Textual Encodings of PKIX Structures).
///
/// Common labels: `"CERTIFICATE"`, `"CERTIFICATE REQUEST"`, `"X509 CRL"`,
/// `"OCSP RESPONSE"`.
pub fn der_to_pem(label: &str, der: &[u8]) -> Vec<u8> {
    let b64 = encode_base64(der);
    // Pre-calculate capacity: two boundary lines + base64 body with newlines.
    let capacity = 27 + label.len() * 2 + b64.len() + b64.len() / 64 + 2;
    let mut out = Vec::with_capacity(capacity);
    out.extend_from_slice(b"-----BEGIN ");
    out.extend_from_slice(label.as_bytes());
    out.extend_from_slice(b"-----\n");
    for line in b64.as_bytes().chunks(64) {
        out.extend_from_slice(line);
        out.push(b'\n');
    }
    out.extend_from_slice(b"-----END ");
    out.extend_from_slice(label.as_bytes());
    out.extend_from_slice(b"-----\n");
    out
}

/// Standard-alphabet base64 encoder.
pub fn encode_base64(data: &[u8]) -> String {
    const ALPHA: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    let mut out = String::with_capacity(data.len().div_ceil(3) * 4);
    let mut it = data.chunks_exact(3);
    for tri in it.by_ref() {
        let (a, b, c) = (tri[0], tri[1], tri[2]);
        out.push(ALPHA[(a >> 2) as usize] as char);
        out.push(ALPHA[((a & 3) << 4 | b >> 4) as usize] as char);
        out.push(ALPHA[((b & 0xf) << 2 | c >> 6) as usize] as char);
        out.push(ALPHA[(c & 0x3f) as usize] as char);
    }
    match it.remainder() {
        [a, b] => {
            out.push(ALPHA[(a >> 2) as usize] as char);
            out.push(ALPHA[((a & 3) << 4 | b >> 4) as usize] as char);
            out.push(ALPHA[((b & 0xf) << 2) as usize] as char);
            out.push('=');
        }
        [a] => {
            out.push(ALPHA[(a >> 2) as usize] as char);
            out.push(ALPHA[((a & 3) << 4) as usize] as char);
            out.push_str("==");
        }
        _ => {}
    }
    out
}

/// Standard-alphabet base64 decoder (input must be padded to a multiple of
/// 4 bytes, as required by valid PEM).
pub fn decode_base64(input: &[u8]) -> Option<Vec<u8>> {
    #[inline]
    fn val(b: u8) -> Option<u8> {
        match b {
            b'A'..=b'Z' => Some(b - b'A'),
            b'a'..=b'z' => Some(b - b'a' + 26),
            b'0'..=b'9' => Some(b - b'0' + 52),
            b'+' => Some(62),
            b'/' => Some(63),
            _ => None,
        }
    }

    if !input.len().is_multiple_of(4) {
        return None;
    }

    let mut out = Vec::with_capacity(input.len() / 4 * 3);

    for chunk in input.chunks(4) {
        match chunk {
            [a, b, b'=', b'='] => {
                let (av, bv) = (val(*a)?, val(*b)?);
                out.push((av << 2) | (bv >> 4));
            }
            [a, b, c, b'='] => {
                let (av, bv, cv) = (val(*a)?, val(*b)?, val(*c)?);
                out.push((av << 2) | (bv >> 4));
                out.push((bv << 4) | (cv >> 2));
            }
            [a, b, c, d] => {
                let (av, bv, cv, dv) = (val(*a)?, val(*b)?, val(*c)?, val(*d)?);
                out.push((av << 2) | (bv >> 4));
                out.push((bv << 4) | (cv >> 2));
                out.push((cv << 6) | dv);
            }
            _ => return None,
        }
    }

    Some(out)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn pem_blocks_returns_label() {
        let pem = b"-----BEGIN CERTIFICATE-----\nTWFu\n-----END CERTIFICATE-----\n";
        assert_eq!(
            pem_blocks(pem),
            vec![("CERTIFICATE".to_string(), b"Man".to_vec())]
        );
    }

    #[test]
    fn pem_blocks_mixed_types() {
        let pem = b"\
-----BEGIN CERTIFICATE-----\n\
TWFu\n\
-----END CERTIFICATE-----\n\
-----BEGIN PRIVATE KEY-----\n\
TWE=\n\
-----END PRIVATE KEY-----\n";
        assert_eq!(
            pem_blocks(pem),
            vec![
                ("CERTIFICATE".to_string(), b"Man".to_vec()),
                ("PRIVATE KEY".to_string(), b"Ma".to_vec()),
            ]
        );
    }

    #[test]
    fn pem_blocks_empty_on_no_block() {
        assert_eq!(pem_blocks(b"not a pem file"), vec![]);
    }

    #[test]
    fn round_trip_short() {
        // "Man" → "TWFu"
        assert_eq!(decode_base64(b"TWFu"), Some(b"Man".to_vec()));
    }

    #[test]
    fn round_trip_one_pad() {
        // "Ma" → "TWE="
        assert_eq!(decode_base64(b"TWE="), Some(b"Ma".to_vec()));
    }

    #[test]
    fn round_trip_two_pad() {
        // "M" → "TQ=="
        assert_eq!(decode_base64(b"TQ=="), Some(b"M".to_vec()));
    }

    #[test]
    fn reject_bad_length() {
        assert!(decode_base64(b"TWF").is_none());
    }

    #[test]
    fn pem_strips_headers_single() {
        let pem = b"-----BEGIN CERTIFICATE-----\nTWFu\n-----END CERTIFICATE-----\n";
        assert_eq!(pem_to_der(pem), vec![b"Man".to_vec()]);
    }

    #[test]
    fn pem_crlf() {
        let pem = b"-----BEGIN CERTIFICATE-----\r\nTWFu\r\n-----END CERTIFICATE-----\r\n";
        assert_eq!(pem_to_der(pem), vec![b"Man".to_vec()]);
    }

    #[test]
    fn pem_no_begin_returns_empty() {
        assert_eq!(pem_to_der(b"not a pem file"), vec![] as Vec<Vec<u8>>);
    }

    #[test]
    fn pem_multiple_blocks() {
        let pem = b"\
-----BEGIN CERTIFICATE-----\n\
TWFu\n\
-----END CERTIFICATE-----\n\
-----BEGIN CERTIFICATE-----\n\
TWE=\n\
-----END CERTIFICATE-----\n\
-----BEGIN CERTIFICATE-----\n\
TQ==\n\
-----END CERTIFICATE-----\n";
        assert_eq!(
            pem_to_der(pem),
            vec![b"Man".to_vec(), b"Ma".to_vec(), b"M".to_vec()]
        );
    }

    #[test]
    fn pem_multiple_types() {
        let pem = b"\
-----BEGIN CERTIFICATE-----\n\
TWFu\n\
-----END CERTIFICATE-----\n\
-----BEGIN PRIVATE KEY-----\n\
TWE=\n\
-----END PRIVATE KEY-----\n";
        assert_eq!(pem_to_der(pem), vec![b"Man".to_vec(), b"Ma".to_vec()]);
    }

    #[test]
    fn encode_base64_no_padding() {
        assert_eq!(encode_base64(b"Man"), "TWFu");
    }

    #[test]
    fn encode_base64_one_pad() {
        assert_eq!(encode_base64(b"Ma"), "TWE=");
    }

    #[test]
    fn encode_base64_two_pad() {
        assert_eq!(encode_base64(b"M"), "TQ==");
    }

    #[test]
    fn der_to_pem_round_trip() {
        let der = b"Man";
        let pem = der_to_pem("CERTIFICATE", der);
        assert!(pem.starts_with(b"-----BEGIN CERTIFICATE-----\n"));
        assert!(pem.ends_with(b"-----END CERTIFICATE-----\n"));
        let decoded = pem_to_der(&pem);
        assert_eq!(decoded, vec![der.to_vec()]);
    }

    #[test]
    fn der_to_pem_line_wrap() {
        // 64 raw bytes → exactly 88 base64 chars → two 44-char lines (no wrap
        // needed per se, but > 64 chars would wrap); here use 49 bytes → 68
        // base64 chars → 64-char line + 4-char line.
        let der = vec![0u8; 49];
        let pem = der_to_pem("TEST", &der);
        let body: Vec<&str> = core::str::from_utf8(&pem)
            .unwrap()
            .lines()
            .filter(|l| !l.starts_with("-----"))
            .collect();
        assert_eq!(body.len(), 2, "expected 2 body lines, got: {body:?}");
        assert_eq!(body[0].len(), 64);
        assert_eq!(body[1].len(), 4);
    }

    #[test]
    fn pem_with_text_between_blocks() {
        // Some PEM files include human-readable header lines between blocks.
        let pem = b"\
subject=CN=foo\n\
issuer=CN=bar\n\
-----BEGIN CERTIFICATE-----\n\
TWFu\n\
-----END CERTIFICATE-----\n\
subject=CN=bar\n\
-----BEGIN CERTIFICATE-----\n\
TWE=\n\
-----END CERTIFICATE-----\n";
        assert_eq!(pem_to_der(pem), vec![b"Man".to_vec(), b"Ma".to_vec()]);
    }

    #[test]
    fn pem_with_non_utf8_bag_attributes() {
        // OpenSSL PKCS#12 export includes "Bag Attributes" sections where the
        // friendlyName value may contain raw non-UTF-8 bytes (e.g. Latin-1
        // encoded characters like \xf1 for ñ).  pem_blocks must still extract
        // the PEM blocks correctly despite the invalid UTF-8 surrounding text.
        let mut pem: Vec<u8> = Vec::new();
        pem.extend_from_slice(b"Bag Attributes\n");
        pem.extend_from_slice(b"    friendlyName: Espa\xf1a\n"); // \xf1 is invalid UTF-8
        pem.extend_from_slice(b"Key Attributes: <No Attributes>\n");
        pem.extend_from_slice(b"-----BEGIN CERTIFICATE-----\n");
        pem.extend_from_slice(b"TWFu\n");
        pem.extend_from_slice(b"-----END CERTIFICATE-----\n");
        pem.extend_from_slice(b"Bag Attributes\n");
        pem.extend_from_slice(b"    friendlyName: caf\xe9\n"); // \xe9 is invalid UTF-8
        pem.extend_from_slice(b"-----BEGIN PRIVATE KEY-----\n");
        pem.extend_from_slice(b"TWE=\n");
        pem.extend_from_slice(b"-----END PRIVATE KEY-----\n");
        let blocks = pem_blocks(&pem);
        assert_eq!(blocks.len(), 2);
        assert_eq!(blocks[0], ("CERTIFICATE".to_string(), b"Man".to_vec()));
        assert_eq!(blocks[1], ("PRIVATE KEY".to_string(), b"Ma".to_vec()));
    }
}