use alloc::{borrow::ToOwned, format, string::String, vec::Vec};
use base32::Alphabet;
use blake2_rfc::blake2b::blake2b;
use core::{fmt, str::FromStr};
use sha2::{Digest as _, Sha256};
use sha3::Keccak256;
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Cid(Vec<u8>);
impl Cid {
pub fn from_bytes(bytes: Vec<u8>) -> Result<Self, ParseError> {
let _ = decode_cid(&bytes)?;
Ok(Cid(bytes))
}
pub fn prefix(&self) -> CidPrefix {
let decoded = decode_cid(&self.0).expect("Cid is always valid; qed");
let prefix_len = self.0.len() - decoded.digest.len();
CidPrefix(self.0[..prefix_len].to_vec())
}
}
impl FromStr for Cid {
type Err = ParseError;
fn from_str(s: &str) -> Result<Self, ParseError> {
let bytes = multibase_base32_decode(s)?;
Self::from_bytes(bytes)
}
}
impl AsRef<[u8]> for Cid {
fn as_ref(&self) -> &[u8] {
&self.0
}
}
impl fmt::Display for Cid {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&multibase_base32_encode(&self.0))
}
}
impl fmt::Debug for Cid {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct CidPrefix(Vec<u8>);
impl CidPrefix {
pub fn from_bytes(bytes: Vec<u8>) -> Result<Self, ParseError> {
let _ = decode_cid_prefix(&bytes)?;
Ok(CidPrefix(bytes))
}
pub fn multihash_type(&self) -> MultihashType {
decode_cid_prefix(&self.0)
.expect("CidPrefix is always valid; qed")
.mh_type
}
pub fn with_digest(self, digest: &[u8; 32]) -> Cid {
let mut bytes = self.0;
bytes.extend_from_slice(digest);
Cid(bytes)
}
pub fn with_digest_of(self, bytes: &[u8]) -> Cid {
let digest = self.multihash_type().digest(bytes);
self.with_digest(&digest)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MultihashType {
Sha2_256 = 0x12,
Keccak256 = 0x1b,
Blake2b256 = 0xb220,
}
impl MultihashType {
pub fn from_code(code: u64) -> Option<Self> {
match code {
0x12 => Some(MultihashType::Sha2_256),
0x1b => Some(MultihashType::Keccak256),
0xb220 => Some(MultihashType::Blake2b256),
_ => None,
}
}
pub fn digest(&self, bytes: &[u8]) -> [u8; 32] {
match self {
MultihashType::Sha2_256 => Sha256::digest(bytes).into(),
MultihashType::Keccak256 => Keccak256::digest(bytes).into(),
MultihashType::Blake2b256 => blake2b(32, &[], bytes)
.as_bytes()
.to_owned()
.try_into()
.expect("correct size passed to constructor; qed"),
}
}
pub fn digest_size(&self) -> usize {
32
}
}
#[derive(Debug, derive_more::Display, derive_more::Error, Clone)]
pub enum ParseError {
#[display("The CID binary representation is invalid.")]
DecodeError,
#[display("Unsupported multibase codec. Only base32 is supported.")]
UnsupportedMultibase,
#[display("Invalid characters in base32 string.")]
InvalidMultibase,
#[display("Unsupported CID version. Only CIDv1 is supported.")]
UnsupportedCidVersion,
#[display("Unsupported multihash code. Only sha2-256 & blake2b-256 are supported.")]
UnsupportedMultihash,
#[display(
"Invalid multihash digest size of {_0} bytes. Must be 32 bytes for sha2-256 / blake2b-256."
)]
InvalidDigestSize(#[error(not(source))] usize),
}
fn multibase_base32_decode(string: &str) -> Result<Vec<u8>, ParseError> {
let Some(base32) = string.strip_prefix('b') else {
return Err(ParseError::UnsupportedMultibase);
};
base32::decode(Alphabet::Rfc4648Lower { padding: false }, base32)
.ok_or(ParseError::InvalidMultibase)
}
fn multibase_base32_encode(data: &[u8]) -> String {
format!(
"b{}",
base32::encode(Alphabet::Rfc4648Lower { padding: false }, data)
)
}
#[derive(Debug)]
#[allow(unused)]
struct DecodedCid<'a> {
codec: u64,
mh_type: MultihashType,
digest: &'a [u8; 32],
}
fn decode_cid<'a>(bytes: &'a [u8]) -> Result<DecodedCid<'a>, ParseError> {
match nom::Parser::parse(
&mut nom::combinator::all_consuming(parse_cid::<nom::error::Error<&[u8]>>),
bytes,
) {
Ok((_rest, (version, codec, mh_type, digest))) => {
debug_assert!(_rest.is_empty());
if version != 1 {
return Err(ParseError::UnsupportedCidVersion);
}
let mh_type =
MultihashType::from_code(mh_type).ok_or(ParseError::UnsupportedMultihash)?;
let digest: &[u8; 32] = digest
.try_into()
.map_err(|_| ParseError::InvalidDigestSize(digest.len()))?;
Ok(DecodedCid {
codec,
mh_type,
digest,
})
}
Err(_) => Err(ParseError::DecodeError),
}
}
#[derive(Debug)]
#[allow(unused)]
struct DecodedCidPrefix {
codec: u64,
mh_type: MultihashType,
}
fn decode_cid_prefix(bytes: &[u8]) -> Result<DecodedCidPrefix, ParseError> {
match nom::Parser::parse(
&mut nom::combinator::all_consuming(parse_cid_prefix::<nom::error::Error<&[u8]>>),
bytes,
) {
Ok((_rest, (version, codec, mh_type, mh_len))) => {
debug_assert!(_rest.is_empty());
if version != 1 {
return Err(ParseError::UnsupportedCidVersion);
}
let mh_type =
MultihashType::from_code(mh_type).ok_or(ParseError::UnsupportedMultihash)?;
if mh_len != mh_type.digest_size() {
return Err(ParseError::InvalidDigestSize(mh_len));
}
Ok(DecodedCidPrefix { codec, mh_type })
}
Err(_) => Err(ParseError::DecodeError),
}
}
fn parse_cid<'a, E: nom::error::ParseError<&'a [u8]>>(
bytes: &'a [u8],
) -> nom::IResult<&'a [u8], (u64, u64, u64, &'a [u8]), E> {
nom::Parser::parse(
&mut (
crate::util::leb128::nom_leb128_u64,
crate::util::leb128::nom_leb128_u64,
crate::util::leb128::nom_leb128_u64,
nom::multi::length_data(crate::util::leb128::nom_leb128_usize),
),
bytes,
)
}
fn parse_cid_prefix<'a, E: nom::error::ParseError<&'a [u8]>>(
bytes: &'a [u8],
) -> nom::IResult<&'a [u8], (u64, u64, u64, usize), E> {
nom::Parser::parse(
&mut (
crate::util::leb128::nom_leb128_u64,
crate::util::leb128::nom_leb128_u64,
crate::util::leb128::nom_leb128_u64,
crate::util::leb128::nom_leb128_usize,
),
bytes,
)
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_prefix_bytes() -> Vec<u8> {
vec![0x01, 0x55, 0x12, 0x20]
}
fn sample_cid_bytes() -> Vec<u8> {
let mut bytes = sample_prefix_bytes();
bytes.extend_from_slice(&[0u8; 32]);
bytes
}
#[test]
fn cid_from_bytes_valid() {
assert!(Cid::from_bytes(sample_cid_bytes()).is_ok());
}
#[test]
fn cid_from_bytes_rejects_prefix_only() {
assert!(Cid::from_bytes(sample_prefix_bytes()).is_err());
}
#[test]
fn cid_from_bytes_rejects_empty() {
assert!(Cid::from_bytes(vec![]).is_err());
}
#[test]
fn cid_from_bytes_rejects_truncated() {
assert!(Cid::from_bytes(vec![0x01, 0x55]).is_err());
}
#[test]
fn cid_from_bytes_rejects_unsupported_version() {
let mut bytes = vec![0x02, 0x55, 0x12, 0x20];
bytes.extend_from_slice(&[0u8; 32]);
assert!(Cid::from_bytes(bytes).is_err());
}
#[test]
fn cid_from_bytes_rejects_unsupported_multihash() {
let mut bytes = vec![0x01, 0x55, 0x00, 0x20];
bytes.extend_from_slice(&[0u8; 32]);
assert!(Cid::from_bytes(bytes).is_err());
}
#[test]
fn cid_from_bytes_rejects_wrong_digest_size() {
let mut bytes = vec![0x01, 0x55, 0x12, 0x10];
bytes.extend_from_slice(&[0u8; 16]);
assert!(Cid::from_bytes(bytes).is_err());
}
#[test]
fn cid_from_bytes_rejects_trailing_bytes() {
let mut bytes = sample_cid_bytes();
bytes.push(0xff);
assert!(Cid::from_bytes(bytes).is_err());
}
#[test]
fn cid_from_str() {
let cid = Cid::from_str("bafk2bzacecjiwibwnfb6fl6rd26a5lrokoutx4lxut6pgw6mmtkqg4comxrae")
.unwrap();
let decoded = decode_cid(cid.as_ref()).unwrap();
assert_eq!(decoded.codec, 0x55);
assert_eq!(decoded.mh_type, MultihashType::Blake2b256);
assert_eq!(
Vec::from(decoded.digest),
hex::decode("928b20366943e2afd11ebc0eae2e53a93bf177a4fcf35bcc64d503704e65e202")
.unwrap()
);
}
#[test]
fn cid_prefix_from_bytes_valid() {
let prefix = CidPrefix::from_bytes(sample_prefix_bytes()).unwrap();
assert_eq!(prefix.multihash_type(), MultihashType::Sha2_256);
}
#[test]
fn cid_prefix_from_bytes_rejects_full_cid() {
let result = CidPrefix::from_bytes(sample_cid_bytes());
assert!(result.is_err());
}
#[test]
fn cid_prefix_from_bytes_rejects_empty() {
assert!(CidPrefix::from_bytes(vec![]).is_err());
}
#[test]
fn cid_prefix_from_bytes_rejects_truncated() {
assert!(CidPrefix::from_bytes(vec![0x01, 0x55]).is_err());
}
#[test]
fn cid_prefix_from_bytes_rejects_unsupported_version() {
assert!(CidPrefix::from_bytes(vec![0x02, 0x55, 0x12, 0x20]).is_err());
}
#[test]
fn cid_prefix_from_bytes_rejects_unsupported_multihash() {
assert!(CidPrefix::from_bytes(vec![0x01, 0x55, 0x00, 0x20]).is_err());
}
#[test]
fn cid_prefix_from_bytes_rejects_wrong_digest_size() {
assert!(CidPrefix::from_bytes(vec![0x01, 0x55, 0x12, 0x10]).is_err());
}
#[test]
fn cid_prefix_from_cid_roundtrip() {
let cid = Cid::from_bytes(sample_cid_bytes()).unwrap();
let prefix = cid.prefix();
assert_eq!(prefix.0, sample_prefix_bytes());
assert_eq!(prefix.multihash_type(), MultihashType::Sha2_256);
assert_eq!(prefix.with_digest(&[0; 32]), cid);
}
#[test]
fn cid_prefix_blake2b256() {
let prefix_bytes: Vec<u8> = vec![0x01, 0x55, 0xa0, 0xe4, 0x02, 0x20];
let prefix = CidPrefix::from_bytes(prefix_bytes).unwrap();
assert_eq!(prefix.multihash_type(), MultihashType::Blake2b256);
}
#[test]
fn cid_prefix_from_cid_blake2b256() {
let mut cid_bytes: Vec<u8> = vec![0x01, 0x55, 0xa0, 0xe4, 0x02, 0x20];
cid_bytes.extend_from_slice(&[0u8; 32]);
let cid = Cid::from_bytes(cid_bytes).unwrap();
let prefix = cid.prefix();
assert_eq!(prefix.multihash_type(), MultihashType::Blake2b256);
assert_eq!(prefix.0, vec![0x01, 0x55, 0xa0, 0xe4, 0x02, 0x20]);
}
}