Skip to main content

cow_app_data/
cid.rs

1//! IPFS `CIDv1` conversion helpers for `CoW` Protocol app-data.
2//!
3//! Every `CoW` Protocol order's `appData` hash can be mapped to an IPFS
4//! Content Identifier (CID) so that the full JSON document is retrievable
5//! from any IPFS gateway. This module handles the bidirectional conversion
6//! between the 32-byte `appDataHex` stored on-chain and the `CIDv1` string
7//! used by IPFS.
8//!
9//! The modern encoding uses `keccak256` with the `raw` multicodec (`0x55`).
10//! Produced CIDs use multibase base16 lowercase (prefix `f`). Parsing also
11//! accepts multibase base32 lowercase (prefix `b`, RFC 4648 unpadded), which
12//! is the default multibase used by `multiformats`' `CID.parse` when no
13//! explicit decoder is provided — so strings like
14//! `bafkrei...` produced by the `TypeScript` SDK round-trip correctly.
15//! Legacy helpers using `dag-pb` / `sha2-256` are preserved for backwards
16//! compatibility but are deprecated.
17//!
18//! # Key functions
19//!
20//! | Function | Direction |
21//! |---|---|
22//! | [`appdata_hex_to_cid`] | `appDataHex` → `CIDv1` string |
23//! | [`cid_to_appdata_hex`] | `CIDv1` string → `appDataHex` |
24//! | [`parse_cid`] | `CIDv1` string → [`CidComponents`] |
25//! | [`decode_cid`] | raw CID bytes → [`CidComponents`] |
26//! | [`extract_digest`] | `CIDv1` string → digest hex |
27
28use cow_errors::CowError;
29
30// CIDv1 constants (modern encoding)
31const CID_VERSION: u8 = 0x01;
32const MULTICODEC_RAW: u8 = 0x55;
33const HASH_KECCAK256: u8 = 0x1b;
34const HASH_LEN: u8 = 0x20; // 32 bytes
35
36// CIDv1 constants (legacy encoding: dag-pb + sha2-256)
37const MULTICODEC_DAG_PB: u8 = 0x70;
38const HASH_SHA2_256: u8 = 0x12;
39
40/// Convert an `appDataHex` value (the 32-byte `keccak256` stored in the
41/// order struct) into a `CIDv1` string.
42///
43/// The CID is built by hashing the raw bytes of `app_data_hex` with
44/// `keccak256`, then wrapping the digest in a `CIDv1` envelope:
45/// `[version=0x01, codec=0x55 (raw), hash_fn=0x1b (keccak256), len=0x20, ...digest]`.
46/// The result is returned as a multibase base16 string (prefix `f`).
47///
48/// This is the inverse of [`cid_to_appdata_hex`].
49///
50/// Mirrors `appDataHexToCid` from the `@cowprotocol/app-data` `TypeScript`
51/// package.
52///
53/// # Parameters
54///
55/// * `app_data_hex` — the `appData` value, with or without `0x` prefix.
56///
57/// # Returns
58///
59/// A base16 `CIDv1` string prefixed with `f` (e.g.
60/// `f015501201b20...`).
61///
62/// # Errors
63///
64/// Returns [`CowError::AppData`] if `app_data_hex` is not valid hex.
65///
66/// # Example
67///
68/// ```
69/// use cow_app_data::{appdata_hex_to_cid, cid_to_appdata_hex};
70///
71/// let hex = "0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
72/// let cid = appdata_hex_to_cid(hex).unwrap();
73/// assert!(cid.starts_with('f')); // multibase base16
74/// ```
75pub fn appdata_hex_to_cid(app_data_hex: &str) -> Result<String, CowError> {
76    let hex = app_data_hex.strip_prefix("0x").map_or(app_data_hex, |s| s);
77    let bytes = alloy_primitives::hex::decode(hex)
78        .map_err(|e| CowError::AppData(format!("invalid hex: {e}")))?;
79
80    if bytes.len() != HASH_LEN as usize {
81        return Err(CowError::AppData(format!(
82            "appDataHex must be {} bytes, got {}",
83            HASH_LEN,
84            bytes.len()
85        )));
86    }
87
88    // The appDataHex is already the keccak256 hash of the canonical JSON
89    // document, so it is used verbatim as the CID multihash digest. The
90    // `HASH_KECCAK256` byte in the header declares the hash function that
91    // produced that digest — re-hashing would break round-trips and diverge
92    // from the TypeScript SDK's `appDataHexToCid`.
93    let mut cid = Vec::with_capacity(4 + HASH_LEN as usize);
94    cid.push(CID_VERSION);
95    cid.push(MULTICODEC_RAW);
96    cid.push(HASH_KECCAK256);
97    cid.push(HASH_LEN);
98    cid.extend_from_slice(&bytes);
99
100    // Multibase base16 lowercase: prefix 'f'
101    Ok(format!("f{}", alloy_primitives::hex::encode(&cid)))
102}
103
104/// Extract the digest from a `CIDv1` string and return it as
105/// `0x`-prefixed hex.
106///
107/// This is the inverse of [`appdata_hex_to_cid`]: given a CID stored
108/// alongside an order, recover the 32-byte digest embedded in the CID
109/// header. The returned value can be used as the `appData` field in an
110/// on-chain order struct.
111///
112/// Accepts multibase base16 (`f`/`F`) and base32 lowercase (`b`/`B`); other
113/// multibase encodings return an error.
114///
115/// Mirrors `cidToAppDataHex` from the `@cowprotocol/app-data` `TypeScript`
116/// package.
117///
118/// # Parameters
119///
120/// * `cid` — a multibase CID string (e.g. `"f015501201b20..."` or `"bafkrei..."`).
121///
122/// # Returns
123///
124/// A `0x`-prefixed, lowercase hex string of the 32-byte digest.
125///
126/// # Errors
127///
128/// Returns [`CowError::AppData`] if the multibase prefix is unsupported,
129/// the payload is not valid, or the decoded bytes are shorter than 36
130/// (4-byte header + 32-byte digest).
131///
132/// # Example
133///
134/// ```
135/// use cow_app_data::{appdata_hex_to_cid, cid_to_appdata_hex};
136///
137/// let hex = "0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
138/// let cid = appdata_hex_to_cid(hex).unwrap();
139/// let recovered = cid_to_appdata_hex(&cid).unwrap();
140/// assert!(recovered.starts_with("0x"));
141/// assert_eq!(recovered.len(), 66); // "0x" + 64 hex chars
142/// ```
143pub fn cid_to_appdata_hex(cid: &str) -> Result<String, CowError> {
144    let bytes = decode_multibase(cid)?;
145
146    // Skip CIDv1 header: version(1) + codec(1) + hash_fn(1) + hash_len(1) = 4 bytes
147    if bytes.len() < 4 + 32 {
148        return Err(CowError::AppData("CID too short".into()));
149    }
150    let digest = &bytes[4..4 + 32];
151    Ok(format!("0x{}", alloy_primitives::hex::encode(digest)))
152}
153
154/// Decode a multibase-prefixed CID string into raw bytes.
155///
156/// Supports the two prefixes emitted by the `multiformats` default base
157/// registry for `CIDv1` payloads we care about:
158///
159/// - `f` / `F` → base16 lowercase (hex)
160/// - `b` / `B` → base32 lowercase, RFC 4648, no padding
161///
162/// Returns [`CowError::AppData`] on unknown prefixes, invalid characters,
163/// or empty input.
164fn decode_multibase(cid: &str) -> Result<Vec<u8>, CowError> {
165    let mut chars = cid.chars();
166    let prefix = chars.next().ok_or_else(|| CowError::AppData("empty CID string".into()))?;
167    let body = chars.as_str();
168
169    match prefix {
170        'f' | 'F' => alloy_primitives::hex::decode(body)
171            .map_err(|e| CowError::AppData(format!("invalid CID hex: {e}"))),
172        'b' | 'B' => decode_base32_lower_nopad(body),
173        other => Err(CowError::AppData(format!(
174            "unsupported CID multibase prefix '{other}' (expected 'f' or 'b')"
175        ))),
176    }
177}
178
179/// RFC 4648 base32 lowercase decoder, no padding (multibase `b`).
180///
181/// Alphabet: `abcdefghijklmnopqrstuvwxyz234567`. Uppercase input is also
182/// accepted (callers should not rely on this — multibase reserves `B` for
183/// the uppercase variant — but matching on a lowercased char is cheaper
184/// than branching twice in [`decode_multibase`]).
185fn decode_base32_lower_nopad(s: &str) -> Result<Vec<u8>, CowError> {
186    let mut out = Vec::with_capacity(s.len() * 5 / 8);
187    let mut buf: u32 = 0;
188    let mut bits: u32 = 0;
189
190    for c in s.chars() {
191        let v: u32 = match c {
192            'a'..='z' => (c as u32) - ('a' as u32),
193            'A'..='Z' => (c as u32) - ('A' as u32),
194            '2'..='7' => (c as u32) - ('2' as u32) + 26,
195            _ => {
196                return Err(CowError::AppData(format!("invalid base32 character '{c}'")));
197            }
198        };
199        buf = (buf << 5) | v;
200        bits += 5;
201        if bits >= 8 {
202            bits -= 8;
203            out.push(((buf >> bits) & 0xff) as u8);
204        }
205    }
206
207    // Trailing bits must be zero (canonical unpadded base32).
208    if bits > 0 && (buf & ((1u32 << bits) - 1)) != 0 {
209        return Err(CowError::AppData("non-canonical base32: trailing bits not zero".into()));
210    }
211
212    Ok(out)
213}
214
215// ── Legacy CID helpers ──────────────────────────────────────────────────────
216
217/// Internal helper: build CID bytes from the given multicodec and hash
218/// algorithm parameters.
219///
220/// This is the Rust equivalent of the `TypeScript` SDK's `_toCidBytes`.
221fn to_cid_bytes(
222    version: u8,
223    multicodec: u8,
224    hashing_algorithm: u8,
225    hashing_length: u8,
226    multihash_hex: &str,
227) -> Result<Vec<u8>, CowError> {
228    let hex = multihash_hex.strip_prefix("0x").map_or(multihash_hex, |s| s);
229    let hash_bytes = alloy_primitives::hex::decode(hex)
230        .map_err(|e| CowError::AppData(format!("invalid hex: {e}")))?;
231
232    let mut cid = Vec::with_capacity(4 + hash_bytes.len());
233    cid.push(version);
234    cid.push(multicodec);
235    cid.push(hashing_algorithm);
236    cid.push(hashing_length);
237    cid.extend_from_slice(&hash_bytes);
238    Ok(cid)
239}
240
241/// Internal helper: convert an `appDataHex` to a `CIDv1` string using the
242/// legacy encoding (`sha2-256` + `dag-pb` multicodec).
243///
244/// **Note**: Legacy CIDs used `CIDv0` (`base58btc`) in the `TypeScript` SDK. This Rust
245/// implementation returns the CID as base16 (prefix `f`) since the crate does not
246/// include a `base58` encoder. Callers requiring `CIDv0` format should convert externally.
247///
248/// This is the Rust equivalent of `_appDataHexToCidLegacy` in the `TypeScript` SDK.
249fn app_data_hex_to_cid_legacy_aux(app_data_hex: &str) -> Result<String, CowError> {
250    let cid_bytes =
251        to_cid_bytes(CID_VERSION, MULTICODEC_DAG_PB, HASH_SHA2_256, HASH_LEN, app_data_hex)?;
252    // Return as base16 since we don't have base58 encoding
253    Ok(format!("f{}", alloy_primitives::hex::encode(&cid_bytes)))
254}
255
256/// Validate that a CID string is non-empty.
257///
258/// A simple guard used after CID derivation to ensure the conversion did
259/// not silently produce an empty string. If `cid` is empty, returns an
260/// error that includes the original `app_data_hex` for debugging.
261///
262/// Mirrors `_assertCid` from the `@cowprotocol/app-data` `TypeScript` package.
263///
264/// # Parameters
265///
266/// * `cid` — the CID string to validate.
267/// * `app_data_hex` — the source hex, included in the error message on failure.
268///
269/// # Errors
270///
271/// Returns [`CowError::AppData`] if `cid` is empty.
272pub fn assert_cid(cid: &str, app_data_hex: &str) -> Result<(), CowError> {
273    if cid.is_empty() {
274        return Err(CowError::AppData(format!("Error getting CID from appDataHex: {app_data_hex}")));
275    }
276    Ok(())
277}
278
279/// Convert an `appDataHex` to a `CIDv1` string using the legacy encoding.
280///
281/// Uses `dag-pb` multicodec with `sha2-256` hashing, matching the original
282/// IPFS CID generation before `CoW` Protocol switched to `keccak256`.
283///
284/// **Note**: The `TypeScript` SDK returns a `CIDv0` (`base58btc`) string. This Rust
285/// implementation returns base16 (prefix `f`) since no `base58` encoder is bundled.
286///
287/// # Errors
288///
289/// Returns [`CowError::AppData`] if `app_data_hex` cannot be decoded.
290#[deprecated(
291    note = "Use appdata_hex_to_cid instead — legacy CID encoding is no longer used by CoW Protocol"
292)]
293pub fn app_data_hex_to_cid_legacy(app_data_hex: &str) -> Result<String, CowError> {
294    let cid = app_data_hex_to_cid_legacy_aux(app_data_hex)?;
295    assert_cid(&cid, app_data_hex)?;
296    Ok(cid)
297}
298
299/// Parsed components of an IPFS Content Identifier (CID).
300///
301/// A CID encodes four header fields followed by the raw hash digest:
302///
303/// ```text
304/// ┌─────────┬───────┬──────────────┬────────────┬──────────────┐
305/// │ version │ codec │ hash_function│ hash_length│   digest     │
306/// │  (1 B)  │ (1 B) │    (1 B)     │   (1 B)    │ (N bytes)    │
307/// └─────────┴───────┴──────────────┴────────────┴──────────────┘
308/// ```
309///
310/// Use [`parse_cid`] to obtain this from a multibase string, or
311/// [`decode_cid`] to obtain it from raw bytes.
312///
313/// # Example
314///
315/// ```
316/// use cow_app_data::{appdata_hex_to_cid, parse_cid};
317///
318/// let hex = "0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
319/// let cid = appdata_hex_to_cid(hex).unwrap();
320/// let components = parse_cid(&cid).unwrap();
321/// assert_eq!(components.version, 0x01); // CIDv1
322/// assert_eq!(components.codec, 0x55); // raw multicodec
323/// assert_eq!(components.hash_function, 0x1b); // keccak256
324/// assert_eq!(components.hash_length, 0x20); // 32 bytes
325/// assert_eq!(components.digest.len(), 32);
326/// ```
327#[derive(Debug, Clone)]
328pub struct CidComponents {
329    /// CID version (e.g. `1` for `CIDv1`).
330    pub version: u8,
331    /// Multicodec code (e.g. `0x55` for raw, `0x70` for dag-pb).
332    pub codec: u8,
333    /// Multihash function code (e.g. `0x1b` for keccak256, `0x12` for sha2-256).
334    pub hash_function: u8,
335    /// Hash digest length in bytes (typically `32`).
336    pub hash_length: u8,
337    /// The raw hash digest bytes.
338    pub digest: Vec<u8>,
339}
340
341/// Parse a CID string into its constituent [`CidComponents`].
342///
343/// Decodes the multibase prefix, strips it, hex-decodes the remainder, and
344/// splits the resulting bytes into the four header fields plus the digest.
345///
346/// Supports multibase base16 (`f`/`F`) and base32 lowercase (`b`/`B`, RFC
347/// 4648 unpadded). Other multibase encodings (e.g. `base58btc` starting
348/// with `Qm`) return an error.
349///
350/// Mirrors `parseCid` from the `@cowprotocol/app-data` `TypeScript` package.
351///
352/// # Parameters
353///
354/// * `ipfs_hash` — a multibase-encoded CID string (e.g. `"f015501201b20..."` or `"bafkrei..."`).
355///
356/// # Returns
357///
358/// A [`CidComponents`] struct with the parsed version, codec, hash function,
359/// hash length, and raw digest bytes.
360///
361/// # Errors
362///
363/// Returns [`CowError::AppData`] if the multibase prefix is unsupported,
364/// the body is malformed, or the decoded payload is shorter than 4 bytes.
365///
366/// # Example
367///
368/// ```
369/// use cow_app_data::{appdata_hex_to_cid, parse_cid};
370///
371/// let hex = "0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
372/// let cid = appdata_hex_to_cid(hex).unwrap();
373/// let c = parse_cid(&cid).unwrap();
374/// assert_eq!(c.version, 1);
375/// assert_eq!(c.digest.len(), 32);
376/// ```
377pub fn parse_cid(ipfs_hash: &str) -> Result<CidComponents, CowError> {
378    let bytes = decode_multibase(ipfs_hash)?;
379
380    if bytes.len() < 4 {
381        return Err(CowError::AppData("CID too short".into()));
382    }
383
384    let version = bytes[0];
385    let codec = bytes[1];
386    let hash_function = bytes[2];
387    let hash_length = bytes[3];
388    let digest = bytes[4..].to_vec();
389
390    Ok(CidComponents { version, codec, hash_function, hash_length, digest })
391}
392
393/// Decode raw CID bytes into their constituent [`CidComponents`].
394///
395/// Unlike [`parse_cid`], this function operates on raw bytes rather than a
396/// multibase-encoded string. Use it when you already have the CID as a byte
397/// slice (e.g. from a binary protocol or a database column).
398///
399/// Mirrors `decodeCid` from the `@cowprotocol/app-data` `TypeScript` package.
400///
401/// # Parameters
402///
403/// * `bytes` — raw CID bytes: `[version, codec, hash_fn, hash_len, ...digest]`.
404///
405/// # Returns
406///
407/// A [`CidComponents`] struct with the parsed fields.
408///
409/// # Errors
410///
411/// Returns [`CowError::AppData`] if the byte slice is shorter than 4 bytes
412/// (the minimum CID header size).
413///
414/// # Example
415///
416/// ```
417/// use cow_app_data::decode_cid;
418///
419/// let mut bytes = vec![0x01, 0x55, 0x1b, 0x20];
420/// bytes.extend_from_slice(&[0u8; 32]); // 32 digest bytes
421/// let c = decode_cid(&bytes).unwrap();
422/// assert_eq!(c.version, 1);
423/// assert_eq!(c.codec, 0x55);
424/// assert_eq!(c.digest.len(), 32);
425/// ```
426pub fn decode_cid(bytes: &[u8]) -> Result<CidComponents, CowError> {
427    if bytes.len() < 4 {
428        return Err(CowError::AppData("CID bytes too short".into()));
429    }
430
431    Ok(CidComponents {
432        version: bytes[0],
433        codec: bytes[1],
434        hash_function: bytes[2],
435        hash_length: bytes[3],
436        digest: bytes[4..].to_vec(),
437    })
438}
439
440/// Extract the multihash digest from a CID string and return it as
441/// `0x`-prefixed hex.
442///
443/// Parses the CID via [`parse_cid`], then returns only the raw digest
444/// portion as a `0x`-prefixed hex string. This is useful when you have a
445/// CID from IPFS and need to recover the hash digest to match against
446/// on-chain `appData` values.
447///
448/// Note: the digest extracted here is the hash **inside** the CID, not the
449/// original `appDataHex`. For round-trip conversion use [`cid_to_appdata_hex`].
450///
451/// Mirrors `extractDigest` from the `@cowprotocol/app-data` `TypeScript`
452/// package.
453///
454/// # Parameters
455///
456/// * `cid` — a base16 multibase CID string.
457///
458/// # Returns
459///
460/// A `0x`-prefixed hex string of the raw digest bytes.
461///
462/// # Errors
463///
464/// Returns [`CowError::AppData`] if the CID cannot be parsed.
465///
466/// # Example
467///
468/// ```
469/// use cow_app_data::{appdata_hex_to_cid, extract_digest};
470///
471/// let hex = "0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
472/// let cid = appdata_hex_to_cid(hex).unwrap();
473/// let digest = extract_digest(&cid).unwrap();
474/// assert!(digest.starts_with("0x"));
475/// assert_eq!(digest.len(), 66); // "0x" + 64 hex chars
476/// ```
477pub fn extract_digest(cid: &str) -> Result<String, CowError> {
478    let components = parse_cid(cid)?;
479    Ok(format!("0x{}", alloy_primitives::hex::encode(&components.digest)))
480}
481
482#[cfg(test)]
483mod tests {
484    use super::*;
485
486    const SAMPLE_HEX: &str = "0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
487
488    #[test]
489    fn appdata_hex_to_cid_produces_base16_cid() {
490        let cid = appdata_hex_to_cid(SAMPLE_HEX).unwrap_or_default();
491        assert!(cid.starts_with('f'));
492        // CID header (4 bytes) + digest (32 bytes) = 36 bytes → 72 hex chars + 'f' prefix
493        assert_eq!(cid.len(), 1 + 72);
494    }
495
496    #[test]
497    fn appdata_hex_to_cid_without_0x_prefix() {
498        let hex = SAMPLE_HEX.strip_prefix("0x").unwrap_or_else(|| SAMPLE_HEX);
499        let cid = appdata_hex_to_cid(hex).unwrap_or_default();
500        assert!(cid.starts_with('f'));
501    }
502
503    #[test]
504    fn cid_to_appdata_hex_roundtrip() {
505        let cid = appdata_hex_to_cid(SAMPLE_HEX).unwrap();
506        let recovered = cid_to_appdata_hex(&cid).unwrap();
507        assert!(recovered.starts_with("0x"));
508        assert_eq!(recovered.len(), 66);
509        assert_eq!(recovered, SAMPLE_HEX);
510    }
511
512    #[test]
513    fn appdata_hex_to_cid_uses_input_as_digest() {
514        // The appDataHex is already a keccak256; it must become the CID digest
515        // verbatim (no extra hashing), matching the TypeScript SDK.
516        let cid = appdata_hex_to_cid(SAMPLE_HEX).unwrap();
517        let components = parse_cid(&cid).unwrap();
518        let expected = alloy_primitives::hex::decode(SAMPLE_HEX.trim_start_matches("0x")).unwrap();
519        assert_eq!(components.digest, expected);
520    }
521
522    #[test]
523    fn appdata_hex_to_cid_rejects_wrong_length() {
524        assert!(appdata_hex_to_cid("0xdeadbeef").is_err());
525    }
526
527    #[test]
528    fn cid_to_appdata_hex_rejects_unsupported_multibase() {
529        // base58btc (prefix 'Q') is not supported.
530        assert!(cid_to_appdata_hex("Qmabc123").is_err());
531        // 'z' multibase is not supported.
532        assert!(cid_to_appdata_hex("zabc123").is_err());
533    }
534
535    #[test]
536    fn cid_to_appdata_hex_rejects_empty() {
537        assert!(cid_to_appdata_hex("").is_err());
538    }
539
540    #[test]
541    fn cid_to_appdata_hex_decodes_uppercase_base32() {
542        // Multibase 'B' (uppercase base32) must decode identically to 'b'.
543        let cid = "BAFKREIEAQHRRDSFTXWXASEAYMNEMP7F7V6PY6PSBH7NQCNPM5RQHMESVXI";
544        let expected = "0x8081e311c8b3bdae0910186348c7fcbfaf9f8f3e413fdb0135ecec60761255ba";
545        assert_eq!(cid_to_appdata_hex(cid).unwrap(), expected);
546    }
547
548    #[test]
549    fn cid_to_appdata_hex_rejects_invalid_base32_char() {
550        // '1' is not in the RFC 4648 base32 alphabet.
551        let err = cid_to_appdata_hex("b1xyz").unwrap_err();
552        assert!(format!("{err}").contains("invalid base32 character"));
553    }
554
555    #[test]
556    fn cid_to_appdata_hex_rejects_non_canonical_base32() {
557        // "az" yields 1 decoded byte with 2 trailing bits equal to 0b01;
558        // the decoder must reject this as non-canonical rather than silently
559        // truncating.
560        let err = cid_to_appdata_hex("baz").unwrap_err();
561        assert!(format!("{err}").contains("non-canonical base32"));
562    }
563
564    #[test]
565    fn cid_to_appdata_hex_decodes_base32_vector() {
566        // Parity vector produced by @cowprotocol/app-data's `cidToAppDataHex`.
567        // Multibase base32 lowercase (prefix 'b'), RFC 4648 unpadded — the
568        // default encoding emitted by `multiformats`' `CID.toString()` for
569        // CIDv1 when no explicit base is chosen.
570        let cid = "bafkreieaqhrrdsftxwxaseaymnemp7f7v6py6psbh7nqcnpm5rqhmesvxi";
571        let expected = "0x8081e311c8b3bdae0910186348c7fcbfaf9f8f3e413fdb0135ecec60761255ba";
572        assert_eq!(cid_to_appdata_hex(cid).unwrap(), expected);
573    }
574
575    #[test]
576    fn cid_to_appdata_hex_rejects_too_short() {
577        assert!(cid_to_appdata_hex("f0155").is_err());
578    }
579
580    #[test]
581    fn parse_cid_components() {
582        let cid = appdata_hex_to_cid(SAMPLE_HEX).expect("SAMPLE_HEX is a valid 32-byte digest");
583        let c = parse_cid(&cid).expect("round-tripped CID is valid");
584        assert_eq!(c.version, CID_VERSION);
585        assert_eq!(c.codec, MULTICODEC_RAW);
586        assert_eq!(c.hash_function, HASH_KECCAK256);
587        assert_eq!(c.hash_length, HASH_LEN);
588        assert_eq!(c.digest.len(), 32);
589    }
590
591    #[test]
592    fn parse_cid_rejects_unsupported_multibase() {
593        // '_' is not a multibase prefix we support.
594        assert!(parse_cid("_not_a_cid").is_err());
595        // base58btc CIDv0 (leading 'Q') is rejected.
596        assert!(parse_cid("QmSomething").is_err());
597    }
598
599    #[test]
600    fn parse_cid_base32_components() {
601        // This vector comes from the TypeScript SDK parity suite. It is a
602        // `raw` CIDv1 but hashed with sha2-256 (0x12), not keccak, so we
603        // assert the actual header the decoder must produce rather than the
604        // keccak-specific constants used elsewhere in the module.
605        let cid = "bafkreieaqhrrdsftxwxaseaymnemp7f7v6py6psbh7nqcnpm5rqhmesvxi";
606        let c = parse_cid(cid).unwrap();
607        assert_eq!(c.version, CID_VERSION);
608        assert_eq!(c.codec, MULTICODEC_RAW);
609        assert_eq!(c.hash_function, HASH_SHA2_256);
610        assert_eq!(c.hash_length, HASH_LEN);
611        assert_eq!(c.digest.len(), 32);
612    }
613
614    #[test]
615    fn parse_cid_rejects_too_short() {
616        assert!(parse_cid("f01").is_err());
617    }
618
619    #[test]
620    fn decode_cid_from_bytes() {
621        let mut bytes = vec![0x01, 0x55, 0x1b, 0x20];
622        bytes.extend_from_slice(&[0xaa; 32]);
623        let c = decode_cid(&bytes).expect("hand-crafted CID bytes are valid");
624        assert_eq!(c.version, 1);
625        assert_eq!(c.codec, 0x55);
626        assert_eq!(c.digest.len(), 32);
627    }
628
629    #[test]
630    fn decode_cid_rejects_short_bytes() {
631        assert!(decode_cid(&[0x01, 0x02, 0x03]).is_err());
632        assert!(decode_cid(&[]).is_err());
633    }
634
635    #[test]
636    fn extract_digest_returns_0x_prefixed() {
637        let cid = appdata_hex_to_cid(SAMPLE_HEX).unwrap_or_default();
638        let digest = extract_digest(&cid).unwrap_or_default();
639        assert!(digest.starts_with("0x"));
640        assert_eq!(digest.len(), 66);
641    }
642
643    #[test]
644    fn assert_cid_accepts_nonempty() {
645        assert!(assert_cid("f01234", "0xabc").is_ok());
646    }
647
648    #[test]
649    fn assert_cid_rejects_empty() {
650        assert!(assert_cid("", "0xabc").is_err());
651    }
652
653    #[test]
654    #[allow(deprecated, reason = "testing legacy API surface")]
655    fn legacy_cid_produces_base16_string() {
656        let cid = app_data_hex_to_cid_legacy(SAMPLE_HEX).unwrap_or_default();
657        assert!(cid.starts_with('f'));
658    }
659
660    #[test]
661    fn appdata_hex_to_cid_invalid_hex() {
662        assert!(appdata_hex_to_cid("0xZZZZ").is_err());
663    }
664
665    #[test]
666    fn deterministic_output() {
667        let cid1 = appdata_hex_to_cid(SAMPLE_HEX).unwrap_or_default();
668        let cid2 = appdata_hex_to_cid(SAMPLE_HEX).unwrap_or_default();
669        assert_eq!(cid1, cid2);
670    }
671
672    #[test]
673    fn cid_to_appdata_hex_invalid_hex() {
674        assert!(cid_to_appdata_hex("fZZZZinvalid").is_err());
675    }
676
677    #[test]
678    fn parse_cid_uppercase_f_prefix() {
679        let cid = appdata_hex_to_cid(SAMPLE_HEX).unwrap();
680        // Replace lowercase 'f' prefix with uppercase 'F'
681        let upper = format!("F{}", &cid[1..]);
682        let c = parse_cid(&upper).unwrap();
683        assert_eq!(c.version, CID_VERSION);
684    }
685
686    #[test]
687    fn to_cid_bytes_without_0x() {
688        let hex = SAMPLE_HEX.strip_prefix("0x").unwrap();
689        let bytes = to_cid_bytes(CID_VERSION, MULTICODEC_RAW, HASH_KECCAK256, HASH_LEN, hex);
690        assert!(bytes.is_ok());
691    }
692
693    #[test]
694    fn to_cid_bytes_invalid_hex() {
695        let result = to_cid_bytes(CID_VERSION, MULTICODEC_RAW, HASH_KECCAK256, HASH_LEN, "ZZZZ");
696        assert!(result.is_err());
697    }
698}