cow_app_data/cid.rs
1//! IPFS `CIDv1` conversion helpers for `CoW` Protocol app-data.
2//!
3//! Every `CoW` Protocol order's `appData` hash can be mapped to an IPFS
4//! Content Identifier (CID) so that the full JSON document is retrievable
5//! from any IPFS gateway. This module handles the bidirectional conversion
6//! between the 32-byte `appDataHex` stored on-chain and the `CIDv1` string
7//! used by IPFS.
8//!
9//! The modern encoding uses `keccak256` with the `raw` multicodec (`0x55`).
10//! Produced CIDs use multibase base16 lowercase (prefix `f`). Parsing also
11//! accepts multibase base32 lowercase (prefix `b`, RFC 4648 unpadded), which
12//! is the default multibase used by `multiformats`' `CID.parse` when no
13//! explicit decoder is provided — so strings like
14//! `bafkrei...` produced by the `TypeScript` SDK round-trip correctly.
15//! Legacy helpers using `dag-pb` / `sha2-256` are preserved for backwards
16//! compatibility but are deprecated.
17//!
18//! # Key functions
19//!
20//! | Function | Direction |
21//! |---|---|
22//! | [`appdata_hex_to_cid`] | `appDataHex` → `CIDv1` string |
23//! | [`cid_to_appdata_hex`] | `CIDv1` string → `appDataHex` |
24//! | [`parse_cid`] | `CIDv1` string → [`CidComponents`] |
25//! | [`decode_cid`] | raw CID bytes → [`CidComponents`] |
26//! | [`extract_digest`] | `CIDv1` string → digest hex |
27
28use cow_errors::CowError;
29
30// CIDv1 constants (modern encoding)
31const CID_VERSION: u8 = 0x01;
32const MULTICODEC_RAW: u8 = 0x55;
33const HASH_KECCAK256: u8 = 0x1b;
34const HASH_LEN: u8 = 0x20; // 32 bytes
35
36// CIDv1 constants (legacy encoding: dag-pb + sha2-256)
37const MULTICODEC_DAG_PB: u8 = 0x70;
38const HASH_SHA2_256: u8 = 0x12;
39
40/// Convert an `appDataHex` value (the 32-byte `keccak256` stored in the
41/// order struct) into a `CIDv1` string.
42///
43/// The CID is built by hashing the raw bytes of `app_data_hex` with
44/// `keccak256`, then wrapping the digest in a `CIDv1` envelope:
45/// `[version=0x01, codec=0x55 (raw), hash_fn=0x1b (keccak256), len=0x20, ...digest]`.
46/// The result is returned as a multibase base16 string (prefix `f`).
47///
48/// This is the inverse of [`cid_to_appdata_hex`].
49///
50/// Mirrors `appDataHexToCid` from the `@cowprotocol/app-data` `TypeScript`
51/// package.
52///
53/// # Parameters
54///
55/// * `app_data_hex` — the `appData` value, with or without `0x` prefix.
56///
57/// # Returns
58///
59/// A base16 `CIDv1` string prefixed with `f` (e.g.
60/// `f015501201b20...`).
61///
62/// # Errors
63///
64/// Returns [`CowError::AppData`] if `app_data_hex` is not valid hex.
65///
66/// # Example
67///
68/// ```
69/// use cow_app_data::{appdata_hex_to_cid, cid_to_appdata_hex};
70///
71/// let hex = "0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
72/// let cid = appdata_hex_to_cid(hex).unwrap();
73/// assert!(cid.starts_with('f')); // multibase base16
74/// ```
75pub fn appdata_hex_to_cid(app_data_hex: &str) -> Result<String, CowError> {
76 let hex = app_data_hex.strip_prefix("0x").map_or(app_data_hex, |s| s);
77 let bytes = alloy_primitives::hex::decode(hex)
78 .map_err(|e| CowError::AppData(format!("invalid hex: {e}")))?;
79
80 if bytes.len() != HASH_LEN as usize {
81 return Err(CowError::AppData(format!(
82 "appDataHex must be {} bytes, got {}",
83 HASH_LEN,
84 bytes.len()
85 )));
86 }
87
88 // The appDataHex is already the keccak256 hash of the canonical JSON
89 // document, so it is used verbatim as the CID multihash digest. The
90 // `HASH_KECCAK256` byte in the header declares the hash function that
91 // produced that digest — re-hashing would break round-trips and diverge
92 // from the TypeScript SDK's `appDataHexToCid`.
93 let mut cid = Vec::with_capacity(4 + HASH_LEN as usize);
94 cid.push(CID_VERSION);
95 cid.push(MULTICODEC_RAW);
96 cid.push(HASH_KECCAK256);
97 cid.push(HASH_LEN);
98 cid.extend_from_slice(&bytes);
99
100 // Multibase base16 lowercase: prefix 'f'
101 Ok(format!("f{}", alloy_primitives::hex::encode(&cid)))
102}
103
104/// Extract the digest from a `CIDv1` string and return it as
105/// `0x`-prefixed hex.
106///
107/// This is the inverse of [`appdata_hex_to_cid`]: given a CID stored
108/// alongside an order, recover the 32-byte digest embedded in the CID
109/// header. The returned value can be used as the `appData` field in an
110/// on-chain order struct.
111///
112/// Accepts multibase base16 (`f`/`F`) and base32 lowercase (`b`/`B`); other
113/// multibase encodings return an error.
114///
115/// Mirrors `cidToAppDataHex` from the `@cowprotocol/app-data` `TypeScript`
116/// package.
117///
118/// # Parameters
119///
120/// * `cid` — a multibase CID string (e.g. `"f015501201b20..."` or `"bafkrei..."`).
121///
122/// # Returns
123///
124/// A `0x`-prefixed, lowercase hex string of the 32-byte digest.
125///
126/// # Errors
127///
128/// Returns [`CowError::AppData`] if the multibase prefix is unsupported,
129/// the payload is not valid, or the decoded bytes are shorter than 36
130/// (4-byte header + 32-byte digest).
131///
132/// # Example
133///
134/// ```
135/// use cow_app_data::{appdata_hex_to_cid, cid_to_appdata_hex};
136///
137/// let hex = "0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
138/// let cid = appdata_hex_to_cid(hex).unwrap();
139/// let recovered = cid_to_appdata_hex(&cid).unwrap();
140/// assert!(recovered.starts_with("0x"));
141/// assert_eq!(recovered.len(), 66); // "0x" + 64 hex chars
142/// ```
143pub fn cid_to_appdata_hex(cid: &str) -> Result<String, CowError> {
144 let bytes = decode_multibase(cid)?;
145
146 // Skip CIDv1 header: version(1) + codec(1) + hash_fn(1) + hash_len(1) = 4 bytes
147 if bytes.len() < 4 + 32 {
148 return Err(CowError::AppData("CID too short".into()));
149 }
150 let digest = &bytes[4..4 + 32];
151 Ok(format!("0x{}", alloy_primitives::hex::encode(digest)))
152}
153
154/// Decode a multibase-prefixed CID string into raw bytes.
155///
156/// Supports the two prefixes emitted by the `multiformats` default base
157/// registry for `CIDv1` payloads we care about:
158///
159/// - `f` / `F` → base16 lowercase (hex)
160/// - `b` / `B` → base32 lowercase, RFC 4648, no padding
161///
162/// Returns [`CowError::AppData`] on unknown prefixes, invalid characters,
163/// or empty input.
164fn decode_multibase(cid: &str) -> Result<Vec<u8>, CowError> {
165 let mut chars = cid.chars();
166 let prefix = chars.next().ok_or_else(|| CowError::AppData("empty CID string".into()))?;
167 let body = chars.as_str();
168
169 match prefix {
170 'f' | 'F' => alloy_primitives::hex::decode(body)
171 .map_err(|e| CowError::AppData(format!("invalid CID hex: {e}"))),
172 'b' | 'B' => decode_base32_lower_nopad(body),
173 other => Err(CowError::AppData(format!(
174 "unsupported CID multibase prefix '{other}' (expected 'f' or 'b')"
175 ))),
176 }
177}
178
179/// RFC 4648 base32 lowercase decoder, no padding (multibase `b`).
180///
181/// Alphabet: `abcdefghijklmnopqrstuvwxyz234567`. Uppercase input is also
182/// accepted (callers should not rely on this — multibase reserves `B` for
183/// the uppercase variant — but matching on a lowercased char is cheaper
184/// than branching twice in [`decode_multibase`]).
185fn decode_base32_lower_nopad(s: &str) -> Result<Vec<u8>, CowError> {
186 let mut out = Vec::with_capacity(s.len() * 5 / 8);
187 let mut buf: u32 = 0;
188 let mut bits: u32 = 0;
189
190 for c in s.chars() {
191 let v: u32 = match c {
192 'a'..='z' => (c as u32) - ('a' as u32),
193 'A'..='Z' => (c as u32) - ('A' as u32),
194 '2'..='7' => (c as u32) - ('2' as u32) + 26,
195 _ => {
196 return Err(CowError::AppData(format!("invalid base32 character '{c}'")));
197 }
198 };
199 buf = (buf << 5) | v;
200 bits += 5;
201 if bits >= 8 {
202 bits -= 8;
203 out.push(((buf >> bits) & 0xff) as u8);
204 }
205 }
206
207 // Trailing bits must be zero (canonical unpadded base32).
208 if bits > 0 && (buf & ((1u32 << bits) - 1)) != 0 {
209 return Err(CowError::AppData("non-canonical base32: trailing bits not zero".into()));
210 }
211
212 Ok(out)
213}
214
215// ── Legacy CID helpers ──────────────────────────────────────────────────────
216
217/// Internal helper: build CID bytes from the given multicodec and hash
218/// algorithm parameters.
219///
220/// This is the Rust equivalent of the `TypeScript` SDK's `_toCidBytes`.
221fn to_cid_bytes(
222 version: u8,
223 multicodec: u8,
224 hashing_algorithm: u8,
225 hashing_length: u8,
226 multihash_hex: &str,
227) -> Result<Vec<u8>, CowError> {
228 let hex = multihash_hex.strip_prefix("0x").map_or(multihash_hex, |s| s);
229 let hash_bytes = alloy_primitives::hex::decode(hex)
230 .map_err(|e| CowError::AppData(format!("invalid hex: {e}")))?;
231
232 let mut cid = Vec::with_capacity(4 + hash_bytes.len());
233 cid.push(version);
234 cid.push(multicodec);
235 cid.push(hashing_algorithm);
236 cid.push(hashing_length);
237 cid.extend_from_slice(&hash_bytes);
238 Ok(cid)
239}
240
241/// Internal helper: convert an `appDataHex` to a `CIDv1` string using the
242/// legacy encoding (`sha2-256` + `dag-pb` multicodec).
243///
244/// **Note**: Legacy CIDs used `CIDv0` (`base58btc`) in the `TypeScript` SDK. This Rust
245/// implementation returns the CID as base16 (prefix `f`) since the crate does not
246/// include a `base58` encoder. Callers requiring `CIDv0` format should convert externally.
247///
248/// This is the Rust equivalent of `_appDataHexToCidLegacy` in the `TypeScript` SDK.
249fn app_data_hex_to_cid_legacy_aux(app_data_hex: &str) -> Result<String, CowError> {
250 let cid_bytes =
251 to_cid_bytes(CID_VERSION, MULTICODEC_DAG_PB, HASH_SHA2_256, HASH_LEN, app_data_hex)?;
252 // Return as base16 since we don't have base58 encoding
253 Ok(format!("f{}", alloy_primitives::hex::encode(&cid_bytes)))
254}
255
256/// Validate that a CID string is non-empty.
257///
258/// A simple guard used after CID derivation to ensure the conversion did
259/// not silently produce an empty string. If `cid` is empty, returns an
260/// error that includes the original `app_data_hex` for debugging.
261///
262/// Mirrors `_assertCid` from the `@cowprotocol/app-data` `TypeScript` package.
263///
264/// # Parameters
265///
266/// * `cid` — the CID string to validate.
267/// * `app_data_hex` — the source hex, included in the error message on failure.
268///
269/// # Errors
270///
271/// Returns [`CowError::AppData`] if `cid` is empty.
272pub fn assert_cid(cid: &str, app_data_hex: &str) -> Result<(), CowError> {
273 if cid.is_empty() {
274 return Err(CowError::AppData(format!("Error getting CID from appDataHex: {app_data_hex}")));
275 }
276 Ok(())
277}
278
279/// Convert an `appDataHex` to a `CIDv1` string using the legacy encoding.
280///
281/// Uses `dag-pb` multicodec with `sha2-256` hashing, matching the original
282/// IPFS CID generation before `CoW` Protocol switched to `keccak256`.
283///
284/// **Note**: The `TypeScript` SDK returns a `CIDv0` (`base58btc`) string. This Rust
285/// implementation returns base16 (prefix `f`) since no `base58` encoder is bundled.
286///
287/// # Errors
288///
289/// Returns [`CowError::AppData`] if `app_data_hex` cannot be decoded.
290#[deprecated(
291 note = "Use appdata_hex_to_cid instead — legacy CID encoding is no longer used by CoW Protocol"
292)]
293pub fn app_data_hex_to_cid_legacy(app_data_hex: &str) -> Result<String, CowError> {
294 let cid = app_data_hex_to_cid_legacy_aux(app_data_hex)?;
295 assert_cid(&cid, app_data_hex)?;
296 Ok(cid)
297}
298
299/// Parsed components of an IPFS Content Identifier (CID).
300///
301/// A CID encodes four header fields followed by the raw hash digest:
302///
303/// ```text
304/// ┌─────────┬───────┬──────────────┬────────────┬──────────────┐
305/// │ version │ codec │ hash_function│ hash_length│ digest │
306/// │ (1 B) │ (1 B) │ (1 B) │ (1 B) │ (N bytes) │
307/// └─────────┴───────┴──────────────┴────────────┴──────────────┘
308/// ```
309///
310/// Use [`parse_cid`] to obtain this from a multibase string, or
311/// [`decode_cid`] to obtain it from raw bytes.
312///
313/// # Example
314///
315/// ```
316/// use cow_app_data::{appdata_hex_to_cid, parse_cid};
317///
318/// let hex = "0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
319/// let cid = appdata_hex_to_cid(hex).unwrap();
320/// let components = parse_cid(&cid).unwrap();
321/// assert_eq!(components.version, 0x01); // CIDv1
322/// assert_eq!(components.codec, 0x55); // raw multicodec
323/// assert_eq!(components.hash_function, 0x1b); // keccak256
324/// assert_eq!(components.hash_length, 0x20); // 32 bytes
325/// assert_eq!(components.digest.len(), 32);
326/// ```
327#[derive(Debug, Clone)]
328pub struct CidComponents {
329 /// CID version (e.g. `1` for `CIDv1`).
330 pub version: u8,
331 /// Multicodec code (e.g. `0x55` for raw, `0x70` for dag-pb).
332 pub codec: u8,
333 /// Multihash function code (e.g. `0x1b` for keccak256, `0x12` for sha2-256).
334 pub hash_function: u8,
335 /// Hash digest length in bytes (typically `32`).
336 pub hash_length: u8,
337 /// The raw hash digest bytes.
338 pub digest: Vec<u8>,
339}
340
341/// Parse a CID string into its constituent [`CidComponents`].
342///
343/// Decodes the multibase prefix, strips it, hex-decodes the remainder, and
344/// splits the resulting bytes into the four header fields plus the digest.
345///
346/// Supports multibase base16 (`f`/`F`) and base32 lowercase (`b`/`B`, RFC
347/// 4648 unpadded). Other multibase encodings (e.g. `base58btc` starting
348/// with `Qm`) return an error.
349///
350/// Mirrors `parseCid` from the `@cowprotocol/app-data` `TypeScript` package.
351///
352/// # Parameters
353///
354/// * `ipfs_hash` — a multibase-encoded CID string (e.g. `"f015501201b20..."` or `"bafkrei..."`).
355///
356/// # Returns
357///
358/// A [`CidComponents`] struct with the parsed version, codec, hash function,
359/// hash length, and raw digest bytes.
360///
361/// # Errors
362///
363/// Returns [`CowError::AppData`] if the multibase prefix is unsupported,
364/// the body is malformed, or the decoded payload is shorter than 4 bytes.
365///
366/// # Example
367///
368/// ```
369/// use cow_app_data::{appdata_hex_to_cid, parse_cid};
370///
371/// let hex = "0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
372/// let cid = appdata_hex_to_cid(hex).unwrap();
373/// let c = parse_cid(&cid).unwrap();
374/// assert_eq!(c.version, 1);
375/// assert_eq!(c.digest.len(), 32);
376/// ```
377pub fn parse_cid(ipfs_hash: &str) -> Result<CidComponents, CowError> {
378 let bytes = decode_multibase(ipfs_hash)?;
379
380 if bytes.len() < 4 {
381 return Err(CowError::AppData("CID too short".into()));
382 }
383
384 let version = bytes[0];
385 let codec = bytes[1];
386 let hash_function = bytes[2];
387 let hash_length = bytes[3];
388 let digest = bytes[4..].to_vec();
389
390 Ok(CidComponents { version, codec, hash_function, hash_length, digest })
391}
392
393/// Decode raw CID bytes into their constituent [`CidComponents`].
394///
395/// Unlike [`parse_cid`], this function operates on raw bytes rather than a
396/// multibase-encoded string. Use it when you already have the CID as a byte
397/// slice (e.g. from a binary protocol or a database column).
398///
399/// Mirrors `decodeCid` from the `@cowprotocol/app-data` `TypeScript` package.
400///
401/// # Parameters
402///
403/// * `bytes` — raw CID bytes: `[version, codec, hash_fn, hash_len, ...digest]`.
404///
405/// # Returns
406///
407/// A [`CidComponents`] struct with the parsed fields.
408///
409/// # Errors
410///
411/// Returns [`CowError::AppData`] if the byte slice is shorter than 4 bytes
412/// (the minimum CID header size).
413///
414/// # Example
415///
416/// ```
417/// use cow_app_data::decode_cid;
418///
419/// let mut bytes = vec![0x01, 0x55, 0x1b, 0x20];
420/// bytes.extend_from_slice(&[0u8; 32]); // 32 digest bytes
421/// let c = decode_cid(&bytes).unwrap();
422/// assert_eq!(c.version, 1);
423/// assert_eq!(c.codec, 0x55);
424/// assert_eq!(c.digest.len(), 32);
425/// ```
426pub fn decode_cid(bytes: &[u8]) -> Result<CidComponents, CowError> {
427 if bytes.len() < 4 {
428 return Err(CowError::AppData("CID bytes too short".into()));
429 }
430
431 Ok(CidComponents {
432 version: bytes[0],
433 codec: bytes[1],
434 hash_function: bytes[2],
435 hash_length: bytes[3],
436 digest: bytes[4..].to_vec(),
437 })
438}
439
440/// Extract the multihash digest from a CID string and return it as
441/// `0x`-prefixed hex.
442///
443/// Parses the CID via [`parse_cid`], then returns only the raw digest
444/// portion as a `0x`-prefixed hex string. This is useful when you have a
445/// CID from IPFS and need to recover the hash digest to match against
446/// on-chain `appData` values.
447///
448/// Note: the digest extracted here is the hash **inside** the CID, not the
449/// original `appDataHex`. For round-trip conversion use [`cid_to_appdata_hex`].
450///
451/// Mirrors `extractDigest` from the `@cowprotocol/app-data` `TypeScript`
452/// package.
453///
454/// # Parameters
455///
456/// * `cid` — a base16 multibase CID string.
457///
458/// # Returns
459///
460/// A `0x`-prefixed hex string of the raw digest bytes.
461///
462/// # Errors
463///
464/// Returns [`CowError::AppData`] if the CID cannot be parsed.
465///
466/// # Example
467///
468/// ```
469/// use cow_app_data::{appdata_hex_to_cid, extract_digest};
470///
471/// let hex = "0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
472/// let cid = appdata_hex_to_cid(hex).unwrap();
473/// let digest = extract_digest(&cid).unwrap();
474/// assert!(digest.starts_with("0x"));
475/// assert_eq!(digest.len(), 66); // "0x" + 64 hex chars
476/// ```
477pub fn extract_digest(cid: &str) -> Result<String, CowError> {
478 let components = parse_cid(cid)?;
479 Ok(format!("0x{}", alloy_primitives::hex::encode(&components.digest)))
480}
481
482#[cfg(test)]
483mod tests {
484 use super::*;
485
486 const SAMPLE_HEX: &str = "0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890";
487
488 #[test]
489 fn appdata_hex_to_cid_produces_base16_cid() {
490 let cid = appdata_hex_to_cid(SAMPLE_HEX).unwrap_or_default();
491 assert!(cid.starts_with('f'));
492 // CID header (4 bytes) + digest (32 bytes) = 36 bytes → 72 hex chars + 'f' prefix
493 assert_eq!(cid.len(), 1 + 72);
494 }
495
496 #[test]
497 fn appdata_hex_to_cid_without_0x_prefix() {
498 let hex = SAMPLE_HEX.strip_prefix("0x").unwrap_or_else(|| SAMPLE_HEX);
499 let cid = appdata_hex_to_cid(hex).unwrap_or_default();
500 assert!(cid.starts_with('f'));
501 }
502
503 #[test]
504 fn cid_to_appdata_hex_roundtrip() {
505 let cid = appdata_hex_to_cid(SAMPLE_HEX).unwrap();
506 let recovered = cid_to_appdata_hex(&cid).unwrap();
507 assert!(recovered.starts_with("0x"));
508 assert_eq!(recovered.len(), 66);
509 assert_eq!(recovered, SAMPLE_HEX);
510 }
511
512 #[test]
513 fn appdata_hex_to_cid_uses_input_as_digest() {
514 // The appDataHex is already a keccak256; it must become the CID digest
515 // verbatim (no extra hashing), matching the TypeScript SDK.
516 let cid = appdata_hex_to_cid(SAMPLE_HEX).unwrap();
517 let components = parse_cid(&cid).unwrap();
518 let expected = alloy_primitives::hex::decode(SAMPLE_HEX.trim_start_matches("0x")).unwrap();
519 assert_eq!(components.digest, expected);
520 }
521
522 #[test]
523 fn appdata_hex_to_cid_rejects_wrong_length() {
524 assert!(appdata_hex_to_cid("0xdeadbeef").is_err());
525 }
526
527 #[test]
528 fn cid_to_appdata_hex_rejects_unsupported_multibase() {
529 // base58btc (prefix 'Q') is not supported.
530 assert!(cid_to_appdata_hex("Qmabc123").is_err());
531 // 'z' multibase is not supported.
532 assert!(cid_to_appdata_hex("zabc123").is_err());
533 }
534
535 #[test]
536 fn cid_to_appdata_hex_rejects_empty() {
537 assert!(cid_to_appdata_hex("").is_err());
538 }
539
540 #[test]
541 fn cid_to_appdata_hex_decodes_uppercase_base32() {
542 // Multibase 'B' (uppercase base32) must decode identically to 'b'.
543 let cid = "BAFKREIEAQHRRDSFTXWXASEAYMNEMP7F7V6PY6PSBH7NQCNPM5RQHMESVXI";
544 let expected = "0x8081e311c8b3bdae0910186348c7fcbfaf9f8f3e413fdb0135ecec60761255ba";
545 assert_eq!(cid_to_appdata_hex(cid).unwrap(), expected);
546 }
547
548 #[test]
549 fn cid_to_appdata_hex_rejects_invalid_base32_char() {
550 // '1' is not in the RFC 4648 base32 alphabet.
551 let err = cid_to_appdata_hex("b1xyz").unwrap_err();
552 assert!(format!("{err}").contains("invalid base32 character"));
553 }
554
555 #[test]
556 fn cid_to_appdata_hex_rejects_non_canonical_base32() {
557 // "az" yields 1 decoded byte with 2 trailing bits equal to 0b01;
558 // the decoder must reject this as non-canonical rather than silently
559 // truncating.
560 let err = cid_to_appdata_hex("baz").unwrap_err();
561 assert!(format!("{err}").contains("non-canonical base32"));
562 }
563
564 #[test]
565 fn cid_to_appdata_hex_decodes_base32_vector() {
566 // Parity vector produced by @cowprotocol/app-data's `cidToAppDataHex`.
567 // Multibase base32 lowercase (prefix 'b'), RFC 4648 unpadded — the
568 // default encoding emitted by `multiformats`' `CID.toString()` for
569 // CIDv1 when no explicit base is chosen.
570 let cid = "bafkreieaqhrrdsftxwxaseaymnemp7f7v6py6psbh7nqcnpm5rqhmesvxi";
571 let expected = "0x8081e311c8b3bdae0910186348c7fcbfaf9f8f3e413fdb0135ecec60761255ba";
572 assert_eq!(cid_to_appdata_hex(cid).unwrap(), expected);
573 }
574
575 #[test]
576 fn cid_to_appdata_hex_rejects_too_short() {
577 assert!(cid_to_appdata_hex("f0155").is_err());
578 }
579
580 #[test]
581 fn parse_cid_components() {
582 let cid = appdata_hex_to_cid(SAMPLE_HEX).expect("SAMPLE_HEX is a valid 32-byte digest");
583 let c = parse_cid(&cid).expect("round-tripped CID is valid");
584 assert_eq!(c.version, CID_VERSION);
585 assert_eq!(c.codec, MULTICODEC_RAW);
586 assert_eq!(c.hash_function, HASH_KECCAK256);
587 assert_eq!(c.hash_length, HASH_LEN);
588 assert_eq!(c.digest.len(), 32);
589 }
590
591 #[test]
592 fn parse_cid_rejects_unsupported_multibase() {
593 // '_' is not a multibase prefix we support.
594 assert!(parse_cid("_not_a_cid").is_err());
595 // base58btc CIDv0 (leading 'Q') is rejected.
596 assert!(parse_cid("QmSomething").is_err());
597 }
598
599 #[test]
600 fn parse_cid_base32_components() {
601 // This vector comes from the TypeScript SDK parity suite. It is a
602 // `raw` CIDv1 but hashed with sha2-256 (0x12), not keccak, so we
603 // assert the actual header the decoder must produce rather than the
604 // keccak-specific constants used elsewhere in the module.
605 let cid = "bafkreieaqhrrdsftxwxaseaymnemp7f7v6py6psbh7nqcnpm5rqhmesvxi";
606 let c = parse_cid(cid).unwrap();
607 assert_eq!(c.version, CID_VERSION);
608 assert_eq!(c.codec, MULTICODEC_RAW);
609 assert_eq!(c.hash_function, HASH_SHA2_256);
610 assert_eq!(c.hash_length, HASH_LEN);
611 assert_eq!(c.digest.len(), 32);
612 }
613
614 #[test]
615 fn parse_cid_rejects_too_short() {
616 assert!(parse_cid("f01").is_err());
617 }
618
619 #[test]
620 fn decode_cid_from_bytes() {
621 let mut bytes = vec![0x01, 0x55, 0x1b, 0x20];
622 bytes.extend_from_slice(&[0xaa; 32]);
623 let c = decode_cid(&bytes).expect("hand-crafted CID bytes are valid");
624 assert_eq!(c.version, 1);
625 assert_eq!(c.codec, 0x55);
626 assert_eq!(c.digest.len(), 32);
627 }
628
629 #[test]
630 fn decode_cid_rejects_short_bytes() {
631 assert!(decode_cid(&[0x01, 0x02, 0x03]).is_err());
632 assert!(decode_cid(&[]).is_err());
633 }
634
635 #[test]
636 fn extract_digest_returns_0x_prefixed() {
637 let cid = appdata_hex_to_cid(SAMPLE_HEX).unwrap_or_default();
638 let digest = extract_digest(&cid).unwrap_or_default();
639 assert!(digest.starts_with("0x"));
640 assert_eq!(digest.len(), 66);
641 }
642
643 #[test]
644 fn assert_cid_accepts_nonempty() {
645 assert!(assert_cid("f01234", "0xabc").is_ok());
646 }
647
648 #[test]
649 fn assert_cid_rejects_empty() {
650 assert!(assert_cid("", "0xabc").is_err());
651 }
652
653 #[test]
654 #[allow(deprecated, reason = "testing legacy API surface")]
655 fn legacy_cid_produces_base16_string() {
656 let cid = app_data_hex_to_cid_legacy(SAMPLE_HEX).unwrap_or_default();
657 assert!(cid.starts_with('f'));
658 }
659
660 #[test]
661 fn appdata_hex_to_cid_invalid_hex() {
662 assert!(appdata_hex_to_cid("0xZZZZ").is_err());
663 }
664
665 #[test]
666 fn deterministic_output() {
667 let cid1 = appdata_hex_to_cid(SAMPLE_HEX).unwrap_or_default();
668 let cid2 = appdata_hex_to_cid(SAMPLE_HEX).unwrap_or_default();
669 assert_eq!(cid1, cid2);
670 }
671
672 #[test]
673 fn cid_to_appdata_hex_invalid_hex() {
674 assert!(cid_to_appdata_hex("fZZZZinvalid").is_err());
675 }
676
677 #[test]
678 fn parse_cid_uppercase_f_prefix() {
679 let cid = appdata_hex_to_cid(SAMPLE_HEX).unwrap();
680 // Replace lowercase 'f' prefix with uppercase 'F'
681 let upper = format!("F{}", &cid[1..]);
682 let c = parse_cid(&upper).unwrap();
683 assert_eq!(c.version, CID_VERSION);
684 }
685
686 #[test]
687 fn to_cid_bytes_without_0x() {
688 let hex = SAMPLE_HEX.strip_prefix("0x").unwrap();
689 let bytes = to_cid_bytes(CID_VERSION, MULTICODEC_RAW, HASH_KECCAK256, HASH_LEN, hex);
690 assert!(bytes.is_ok());
691 }
692
693 #[test]
694 fn to_cid_bytes_invalid_hex() {
695 let result = to_cid_bytes(CID_VERSION, MULTICODEC_RAW, HASH_KECCAK256, HASH_LEN, "ZZZZ");
696 assert!(result.is_err());
697 }
698}