Skip to main content

bee/swarm/
cid.rs

1//! Swarm reference ↔ CID conversion (manifest and feed codecs).
2//!
3//! Mirrors `pkg/swarm/cid.go` in bee-go and `src/utils/cid.ts` in
4//! bee-js. The encoding is base32 (RFC4648, no padding), lowercased,
5//! prefixed with `b` per the multibase spec.
6
7use crate::swarm::errors::Error;
8use crate::swarm::typed_bytes::Reference;
9
10/// Multicodec for the Swarm manifest CID type.
11pub const MANIFEST_CODEC: u8 = 0xfa;
12/// Multicodec for the Swarm feed CID type.
13pub const FEED_CODEC: u8 = 0xfb;
14
15/// CID-encoded reference type.
16#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
17pub enum CidType {
18    /// Manifest CID.
19    Manifest,
20    /// Feed CID.
21    Feed,
22}
23
24impl CidType {
25    fn codec(self) -> u8 {
26        match self {
27            CidType::Manifest => MANIFEST_CODEC,
28            CidType::Feed => FEED_CODEC,
29        }
30    }
31
32    fn from_codec(c: u8) -> Result<Self, Error> {
33        match c {
34            MANIFEST_CODEC => Ok(CidType::Manifest),
35            FEED_CODEC => Ok(CidType::Feed),
36            other => Err(Error::argument(format!("unknown CID codec: 0x{other:02x}"))),
37        }
38    }
39}
40
41/// Decoded CID payload returned by [`convert_cid_to_reference`].
42#[derive(Clone, Debug, PartialEq, Eq)]
43pub struct DecodedCid {
44    /// The CID type recovered from the multicodec byte.
45    pub kind: CidType,
46    /// The Swarm reference embedded in the CID.
47    pub reference: Reference,
48}
49
50/// Encode a 32-byte Swarm reference as a CID string.
51///
52/// Output is `"b" + base32(header) + base32(reference)` lowercased,
53/// where `header = [version=1, codec, _=1, sha256=0x1b, size=32]`.
54pub fn convert_reference_to_cid(reference: &Reference, kind: CidType) -> Result<String, Error> {
55    if reference.is_encrypted() {
56        return Err(Error::argument(
57            "encrypted references (64 bytes) cannot be encoded as a CID",
58        ));
59    }
60    let header: [u8; 5] = [1, kind.codec(), 1, 0x1b, 32];
61    let mut out = String::with_capacity(1 + 8 + 52);
62    out.push('b');
63    out.push_str(&b32_encode(&header).to_ascii_lowercase());
64    out.push_str(&b32_encode(reference.as_bytes()).to_ascii_lowercase());
65    Ok(out)
66}
67
68/// Decode a CID string into its codec type + Swarm reference.
69pub fn convert_cid_to_reference(cid: &str) -> Result<DecodedCid, Error> {
70    let body = cid
71        .strip_prefix('b')
72        .ok_or_else(|| Error::argument("CID must start with multibase prefix 'b'"))?;
73    let bytes = b32_decode(&body.to_ascii_uppercase())?;
74    if bytes.len() < 32 {
75        return Err(Error::argument(format!(
76            "decoded CID too short: {} bytes",
77            bytes.len()
78        )));
79    }
80    // bee-js / bee-go convention: codec is byte index 1; reference is
81    // the last 32 bytes.
82    let kind = CidType::from_codec(bytes[1])?;
83    let ref_bytes = &bytes[bytes.len() - 32..];
84    let reference = Reference::new(ref_bytes)?;
85    Ok(DecodedCid { kind, reference })
86}
87
88// ---- minimal RFC4648 base32 (NoPadding) --------------------------------
89
90const B32_ALPHABET: &[u8; 32] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
91
92fn b32_encode(input: &[u8]) -> String {
93    let mut out = String::with_capacity(input.len().div_ceil(5) * 8);
94    let mut buffer: u64 = 0;
95    let mut bits: u32 = 0;
96    for &b in input {
97        buffer = (buffer << 8) | b as u64;
98        bits += 8;
99        while bits >= 5 {
100            bits -= 5;
101            let idx = ((buffer >> bits) & 0x1f) as usize;
102            out.push(B32_ALPHABET[idx] as char);
103        }
104    }
105    if bits > 0 {
106        let idx = ((buffer << (5 - bits)) & 0x1f) as usize;
107        out.push(B32_ALPHABET[idx] as char);
108    }
109    out
110}
111
112fn b32_decode(input: &str) -> Result<Vec<u8>, Error> {
113    let mut out = Vec::with_capacity(input.len() * 5 / 8);
114    let mut buffer: u64 = 0;
115    let mut bits: u32 = 0;
116    for c in input.chars() {
117        let v: u32 = match c {
118            'A'..='Z' => c as u32 - 'A' as u32,
119            '2'..='7' => 26 + (c as u32 - '2' as u32),
120            other => {
121                return Err(Error::argument(format!(
122                    "invalid base32 character: {other:?}"
123                )));
124            }
125        };
126        buffer = (buffer << 5) | v as u64;
127        bits += 5;
128        if bits >= 8 {
129            bits -= 8;
130            out.push(((buffer >> bits) & 0xff) as u8);
131        }
132    }
133    Ok(out)
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139
140    #[test]
141    fn round_trip_feed() {
142        // Same fixture as bee-go's TestCID.
143        let hex = "ca6357a08e317d15ec560fef34e4c45f8f19f01c75d6f20a7021602e9575a617";
144        let reference = Reference::from_hex(hex).unwrap();
145        let cid = convert_reference_to_cid(&reference, CidType::Feed).unwrap();
146        assert!(cid.starts_with('b'));
147        let decoded = convert_cid_to_reference(&cid).unwrap();
148        assert_eq!(decoded.kind, CidType::Feed);
149        assert_eq!(decoded.reference.to_hex(), hex);
150    }
151
152    #[test]
153    fn round_trip_manifest() {
154        let hex = "f".repeat(64);
155        let reference = Reference::from_hex(&hex).unwrap();
156        let cid = convert_reference_to_cid(&reference, CidType::Manifest).unwrap();
157        let decoded = convert_cid_to_reference(&cid).unwrap();
158        assert_eq!(decoded.kind, CidType::Manifest);
159        assert_eq!(decoded.reference.to_hex(), hex);
160    }
161
162    #[test]
163    fn rejects_encrypted_reference() {
164        let reference = Reference::from_hex(&"a".repeat(128)).unwrap();
165        assert!(convert_reference_to_cid(&reference, CidType::Feed).is_err());
166    }
167
168    #[test]
169    fn rejects_unknown_codec() {
170        // Hand-craft a CID with an invalid codec byte.
171        let header: [u8; 5] = [1, 0x55, 1, 0x1b, 32];
172        let mut bytes = Vec::new();
173        bytes.extend_from_slice(&header);
174        bytes.extend_from_slice(&[0u8; 32]);
175        let cid = format!("b{}", b32_encode(&bytes).to_ascii_lowercase());
176        assert!(convert_cid_to_reference(&cid).is_err());
177    }
178
179    #[test]
180    fn rejects_missing_prefix() {
181        assert!(convert_cid_to_reference("acafef00").is_err());
182    }
183
184    #[test]
185    fn rejects_short_cid() {
186        let cid = format!("b{}", b32_encode(&[0u8; 8]).to_ascii_lowercase());
187        assert!(convert_cid_to_reference(&cid).is_err());
188    }
189
190    #[test]
191    fn base32_encode_matches_rfc4648_vectors() {
192        // RFC 4648 §10 test vectors (no padding).
193        assert_eq!(b32_encode(b""), "");
194        assert_eq!(b32_encode(b"f"), "MY");
195        assert_eq!(b32_encode(b"fo"), "MZXQ");
196        assert_eq!(b32_encode(b"foo"), "MZXW6");
197        assert_eq!(b32_encode(b"foob"), "MZXW6YQ");
198        assert_eq!(b32_encode(b"fooba"), "MZXW6YTB");
199        assert_eq!(b32_encode(b"foobar"), "MZXW6YTBOI");
200    }
201
202    #[test]
203    fn base32_decode_matches_rfc4648_vectors() {
204        assert_eq!(b32_decode("").unwrap(), b"");
205        assert_eq!(b32_decode("MY").unwrap(), b"f");
206        assert_eq!(b32_decode("MZXQ").unwrap(), b"fo");
207        assert_eq!(b32_decode("MZXW6").unwrap(), b"foo");
208        assert_eq!(b32_decode("MZXW6YQ").unwrap(), b"foob");
209        assert_eq!(b32_decode("MZXW6YTB").unwrap(), b"fooba");
210        assert_eq!(b32_decode("MZXW6YTBOI").unwrap(), b"foobar");
211    }
212}