pub mod cdc;
pub mod codec;
pub mod conformance;
pub(crate) mod dct;
pub mod minhash;
pub mod simhash;
pub mod streaming;
pub mod types;
pub mod utils;
pub(crate) mod wtahash;
pub use cdc::alg_cdc_chunks;
pub use codec::encode_base64;
pub use codec::iscc_decompose;
pub use conformance::conformance_selftest;
pub use minhash::alg_minhash_256;
pub use simhash::{alg_simhash, sliding_window};
pub use streaming::{DataHasher, InstanceHasher};
pub use types::*;
#[cfg(feature = "text-processing")]
pub use utils::{text_clean, text_collapse};
pub use utils::{text_remove_newlines, text_trim};
#[cfg(feature = "meta-code")]
pub const META_TRIM_NAME: usize = 128;
#[cfg(feature = "meta-code")]
pub const META_TRIM_DESCRIPTION: usize = 4096;
#[cfg(feature = "meta-code")]
pub const META_TRIM_META: usize = 128_000;
pub const IO_READ_SIZE: usize = 4_194_304;
pub const TEXT_NGRAM_SIZE: usize = 13;
#[derive(Debug, thiserror::Error)]
pub enum IsccError {
#[error("invalid input: {0}")]
InvalidInput(String),
}
pub type IsccResult<T> = Result<T, IsccError>;
#[cfg(feature = "meta-code")]
fn interleave_digests(a: &[u8], b: &[u8]) -> Vec<u8> {
let mut result = vec![0u8; 32];
for chunk in 0..4 {
let src = chunk * 4;
let dst_a = chunk * 8;
let dst_b = chunk * 8 + 4;
result[dst_a..dst_a + 4].copy_from_slice(&a[src..src + 4]);
result[dst_b..dst_b + 4].copy_from_slice(&b[src..src + 4]);
}
result
}
#[cfg(feature = "meta-code")]
fn meta_name_simhash(name: &str) -> Vec<u8> {
let collapsed_name = utils::text_collapse(name);
let name_ngrams = simhash::sliding_window_strs(&collapsed_name, 3);
let name_hashes: Vec<[u8; 32]> = name_ngrams
.iter()
.map(|ng| *blake3::hash(ng.as_bytes()).as_bytes())
.collect();
simhash::alg_simhash_inner(&name_hashes)
}
#[cfg(feature = "meta-code")]
fn soft_hash_meta_v0(name: &str, extra: Option<&str>) -> Vec<u8> {
let name_simhash = meta_name_simhash(name);
match extra {
None | Some("") => name_simhash,
Some(extra_str) => {
let collapsed_extra = utils::text_collapse(extra_str);
let extra_ngrams = simhash::sliding_window_strs(&collapsed_extra, 3);
let extra_hashes: Vec<[u8; 32]> = extra_ngrams
.iter()
.map(|ng| *blake3::hash(ng.as_bytes()).as_bytes())
.collect();
let extra_simhash = simhash::alg_simhash_inner(&extra_hashes);
interleave_digests(&name_simhash, &extra_simhash)
}
}
}
#[cfg(feature = "meta-code")]
fn soft_hash_meta_v0_with_bytes(name: &str, extra: &[u8]) -> Vec<u8> {
let name_simhash = meta_name_simhash(name);
if extra.is_empty() {
return name_simhash;
}
let byte_ngrams = simhash::sliding_window_bytes(extra, 4);
let byte_hashes: Vec<[u8; 32]> = byte_ngrams
.iter()
.map(|ng| *blake3::hash(ng).as_bytes())
.collect();
let byte_simhash = simhash::alg_simhash_inner(&byte_hashes);
interleave_digests(&name_simhash, &byte_simhash)
}
#[cfg(feature = "meta-code")]
fn decode_data_url(data_url: &str) -> IsccResult<Vec<u8>> {
let payload_b64 = data_url
.split_once(',')
.map(|(_, b64)| b64)
.ok_or_else(|| IsccError::InvalidInput("Data-URL missing comma separator".into()))?;
data_encoding::BASE64
.decode(payload_b64.as_bytes())
.map_err(|e| IsccError::InvalidInput(format!("invalid base64 in Data-URL: {e}")))
}
#[cfg(feature = "meta-code")]
fn parse_meta_json(meta_str: &str) -> IsccResult<Vec<u8>> {
let parsed: serde_json::Value = serde_json::from_str(meta_str)
.map_err(|e| IsccError::InvalidInput(format!("invalid JSON in meta: {e}")))?;
let mut buf = Vec::new();
serde_json_canonicalizer::to_writer(&parsed, &mut buf)
.map_err(|e| IsccError::InvalidInput(format!("JSON canonicalization failed: {e}")))?;
Ok(buf)
}
#[cfg(feature = "meta-code")]
fn build_meta_data_url(json_bytes: &[u8], json_value: &serde_json::Value) -> String {
let media_type = if json_value.get("@context").is_some() {
"application/ld+json"
} else {
"application/json"
};
let b64 = data_encoding::BASE64.encode(json_bytes);
format!("data:{media_type};base64,{b64}")
}
pub fn encode_component(
mtype: u8,
stype: u8,
version: u8,
bit_length: u32,
digest: &[u8],
) -> IsccResult<String> {
let mt = codec::MainType::try_from(mtype)?;
let st = codec::SubType::try_from(stype)?;
let vs = codec::Version::try_from(version)?;
let needed = (bit_length / 8) as usize;
if digest.len() < needed {
return Err(IsccError::InvalidInput(format!(
"digest length {} < bit_length/8 ({})",
digest.len(),
needed
)));
}
codec::encode_component(mt, st, vs, bit_length, digest)
}
pub fn iscc_decode(iscc: &str) -> IsccResult<(u8, u8, u8, u8, Vec<u8>)> {
let clean = iscc.strip_prefix("ISCC:").unwrap_or(iscc);
let clean = clean.replace('-', "");
let raw = codec::decode_base32(&clean)?;
let (mt, st, vs, length_index, tail) = codec::decode_header(&raw)?;
let bit_length = codec::decode_length(mt, length_index, st);
let nbytes = (bit_length / 8) as usize;
if tail.len() < nbytes {
return Err(IsccError::InvalidInput(format!(
"decoded body too short: expected {nbytes} digest bytes, got {}",
tail.len()
)));
}
Ok((
mt as u8,
st as u8,
vs as u8,
length_index as u8,
tail[..nbytes].to_vec(),
))
}
#[cfg(feature = "meta-code")]
pub fn json_to_data_url(json: &str) -> IsccResult<String> {
let parsed: serde_json::Value = serde_json::from_str(json)
.map_err(|e| IsccError::InvalidInput(format!("invalid JSON: {e}")))?;
let mut canonical_bytes = Vec::new();
serde_json_canonicalizer::to_writer(&parsed, &mut canonical_bytes)
.map_err(|e| IsccError::InvalidInput(format!("JSON canonicalization failed: {e}")))?;
Ok(build_meta_data_url(&canonical_bytes, &parsed))
}
#[cfg(feature = "meta-code")]
pub fn gen_meta_code_v0(
name: &str,
description: Option<&str>,
meta: Option<&str>,
bits: u32,
) -> IsccResult<MetaCodeResult> {
let name = utils::text_clean(name);
let name = utils::text_remove_newlines(&name);
let name = utils::text_trim(&name, META_TRIM_NAME);
if name.is_empty() {
return Err(IsccError::InvalidInput(
"name is empty after normalization".into(),
));
}
let desc_str = description.unwrap_or("");
let desc_clean = utils::text_clean(desc_str);
let desc_clean = utils::text_trim(&desc_clean, META_TRIM_DESCRIPTION);
if let Some(meta_str) = meta {
const PRE_DECODE_LIMIT: usize = META_TRIM_META * 4 / 3 + 256;
if meta_str.len() > PRE_DECODE_LIMIT {
return Err(IsccError::InvalidInput(format!(
"meta string exceeds size limit ({} > {PRE_DECODE_LIMIT} bytes)",
meta_str.len()
)));
}
}
let meta_payload: Option<Vec<u8>> = match meta {
Some(meta_str) if meta_str.starts_with("data:") => Some(decode_data_url(meta_str)?),
Some(meta_str) => Some(parse_meta_json(meta_str)?),
None => None,
};
if let Some(ref payload) = meta_payload {
if payload.len() > META_TRIM_META {
return Err(IsccError::InvalidInput(format!(
"decoded meta payload exceeds size limit ({} > {META_TRIM_META} bytes)",
payload.len()
)));
}
}
if let Some(ref payload) = meta_payload {
let meta_code_digest = soft_hash_meta_v0_with_bytes(&name, payload);
let metahash = utils::multi_hash_blake3(payload);
let meta_code = codec::encode_component(
codec::MainType::Meta,
codec::SubType::None,
codec::Version::V0,
bits,
&meta_code_digest,
)?;
let meta_value = match meta {
Some(meta_str) if meta_str.starts_with("data:") => meta_str.to_string(),
Some(meta_str) => {
let parsed: serde_json::Value = serde_json::from_str(meta_str)
.map_err(|e| IsccError::InvalidInput(format!("invalid JSON: {e}")))?;
build_meta_data_url(payload, &parsed)
}
None => unreachable!(),
};
Ok(MetaCodeResult {
iscc: format!("ISCC:{meta_code}"),
name: name.clone(),
description: if desc_clean.is_empty() {
None
} else {
Some(desc_clean)
},
meta: Some(meta_value),
metahash,
})
} else {
let payload = if desc_clean.is_empty() {
name.clone()
} else {
format!("{name} {desc_clean}")
};
let payload = payload.trim().to_string();
let metahash = utils::multi_hash_blake3(payload.as_bytes());
let extra = if desc_clean.is_empty() {
None
} else {
Some(desc_clean.as_str())
};
let meta_code_digest = soft_hash_meta_v0(&name, extra);
let meta_code = codec::encode_component(
codec::MainType::Meta,
codec::SubType::None,
codec::Version::V0,
bits,
&meta_code_digest,
)?;
Ok(MetaCodeResult {
iscc: format!("ISCC:{meta_code}"),
name: name.clone(),
description: if desc_clean.is_empty() {
None
} else {
Some(desc_clean)
},
meta: None,
metahash,
})
}
}
#[cfg(feature = "text-processing")]
fn soft_hash_text_v0(text: &str) -> Vec<u8> {
let ngrams = simhash::sliding_window_strs(text, TEXT_NGRAM_SIZE);
let features: Vec<u32> = ngrams
.iter()
.map(|ng| xxhash_rust::xxh32::xxh32(ng.as_bytes(), 0))
.collect();
minhash::alg_minhash_256(&features)
}
#[cfg(feature = "text-processing")]
pub fn gen_text_code_v0(text: &str, bits: u32) -> IsccResult<TextCodeResult> {
let collapsed = utils::text_collapse(text);
let characters = collapsed.chars().count();
let hash_digest = soft_hash_text_v0(&collapsed);
let component = codec::encode_component(
codec::MainType::Content,
codec::SubType::TEXT,
codec::Version::V0,
bits,
&hash_digest,
)?;
Ok(TextCodeResult {
iscc: format!("ISCC:{component}"),
characters,
})
}
fn transpose_matrix(matrix: &[Vec<f64>]) -> Vec<Vec<f64>> {
let rows = matrix.len();
if rows == 0 {
return vec![];
}
let cols = matrix[0].len();
let mut result = vec![vec![0.0f64; rows]; cols];
for (r, row) in matrix.iter().enumerate() {
for (c, &val) in row.iter().enumerate() {
result[c][r] = val;
}
}
result
}
fn flatten_8x8(matrix: &[Vec<f64>], col: usize, row: usize) -> Vec<f64> {
let mut flat = Vec::with_capacity(64);
for matrix_row in matrix.iter().skip(row).take(8) {
for &val in matrix_row.iter().skip(col).take(8) {
flat.push(val);
}
}
flat
}
fn compute_median(values: &[f64]) -> f64 {
let mut sorted: Vec<f64> = values.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
let n = sorted.len();
if n % 2 == 1 {
sorted[n / 2]
} else {
(sorted[n / 2 - 1] + sorted[n / 2]) / 2.0
}
}
fn bits_to_bytes(bits: &[bool]) -> Vec<u8> {
bits.chunks(8)
.map(|chunk| {
let mut byte = 0u8;
for (i, &bit) in chunk.iter().enumerate() {
if bit {
byte |= 1 << (7 - i);
}
}
byte
})
.collect()
}
fn soft_hash_image_v0(pixels: &[u8], bits: u32) -> IsccResult<Vec<u8>> {
if pixels.len() != 1024 {
return Err(IsccError::InvalidInput(format!(
"expected 1024 pixels, got {}",
pixels.len()
)));
}
if bits > 256 {
return Err(IsccError::InvalidInput(format!(
"bits must be <= 256, got {bits}"
)));
}
let rows: Vec<Vec<f64>> = pixels
.chunks(32)
.map(|row| {
let row_f64: Vec<f64> = row.iter().map(|&p| p as f64).collect();
dct::alg_dct(&row_f64)
})
.collect::<IsccResult<Vec<Vec<f64>>>>()?;
let transposed = transpose_matrix(&rows);
let dct_cols: Vec<Vec<f64>> = transposed
.iter()
.map(|col| dct::alg_dct(col))
.collect::<IsccResult<Vec<Vec<f64>>>>()?;
let dct_matrix = transpose_matrix(&dct_cols);
let positions = [(0, 0), (1, 0), (0, 1), (1, 1)];
let mut bitstring = Vec::<bool>::with_capacity(256);
for (col, row) in positions {
let flat = flatten_8x8(&dct_matrix, col, row);
let median = compute_median(&flat);
for val in &flat {
bitstring.push(*val > median);
}
if bitstring.len() >= bits as usize {
break;
}
}
Ok(bits_to_bytes(&bitstring[..bits as usize]))
}
pub fn gen_image_code_v0(pixels: &[u8], bits: u32) -> IsccResult<ImageCodeResult> {
let hash_digest = soft_hash_image_v0(pixels, bits)?;
let component = codec::encode_component(
codec::MainType::Content,
codec::SubType::Image,
codec::Version::V0,
bits,
&hash_digest,
)?;
Ok(ImageCodeResult {
iscc: format!("ISCC:{component}"),
})
}
fn array_split<T>(slice: &[T], n: usize) -> Vec<&[T]> {
if n == 0 {
return vec![];
}
let len = slice.len();
let base = len / n;
let remainder = len % n;
let mut parts = Vec::with_capacity(n);
let mut offset = 0;
for i in 0..n {
let size = base + if i < remainder { 1 } else { 0 };
parts.push(&slice[offset..offset + size]);
offset += size;
}
parts
}
fn soft_hash_audio_v0(cv: &[i32]) -> Vec<u8> {
let digests: Vec<[u8; 4]> = cv.iter().map(|&v| v.to_be_bytes()).collect();
if digests.is_empty() {
return vec![0u8; 32];
}
let mut parts: Vec<u8> = simhash::alg_simhash_inner(&digests);
let quarters = array_split(&digests, 4);
for quarter in &quarters {
if quarter.is_empty() {
parts.extend_from_slice(&[0u8; 4]);
} else {
parts.extend_from_slice(&simhash::alg_simhash_inner(quarter));
}
}
let mut sorted_values: Vec<i32> = cv.to_vec();
sorted_values.sort();
let sorted_digests: Vec<[u8; 4]> = sorted_values.iter().map(|&v| v.to_be_bytes()).collect();
let thirds = array_split(&sorted_digests, 3);
for third in &thirds {
if third.is_empty() {
parts.extend_from_slice(&[0u8; 4]);
} else {
parts.extend_from_slice(&simhash::alg_simhash_inner(third));
}
}
parts
}
pub fn gen_audio_code_v0(cv: &[i32], bits: u32) -> IsccResult<AudioCodeResult> {
let hash_digest = soft_hash_audio_v0(cv);
let component = codec::encode_component(
codec::MainType::Content,
codec::SubType::Audio,
codec::Version::V0,
bits,
&hash_digest,
)?;
Ok(AudioCodeResult {
iscc: format!("ISCC:{component}"),
})
}
pub fn soft_hash_video_v0<S: AsRef<[i32]> + Ord>(
frame_sigs: &[S],
bits: u32,
) -> IsccResult<Vec<u8>> {
if frame_sigs.is_empty() {
return Err(IsccError::InvalidInput(
"frame_sigs must not be empty".into(),
));
}
let unique: std::collections::BTreeSet<&S> = frame_sigs.iter().collect();
let cols = frame_sigs[0].as_ref().len();
let mut vecsum = vec![0i64; cols];
for sig in &unique {
for (c, &val) in sig.as_ref().iter().enumerate() {
vecsum[c] += val as i64;
}
}
wtahash::alg_wtahash(&vecsum, bits)
}
pub fn gen_video_code_v0<S: AsRef<[i32]> + Ord>(
frame_sigs: &[S],
bits: u32,
) -> IsccResult<VideoCodeResult> {
let digest = soft_hash_video_v0(frame_sigs, bits)?;
let component = codec::encode_component(
codec::MainType::Content,
codec::SubType::Video,
codec::Version::V0,
bits,
&digest,
)?;
Ok(VideoCodeResult {
iscc: format!("ISCC:{component}"),
})
}
fn soft_hash_codes_v0(cc_digests: &[Vec<u8>], bits: u32) -> IsccResult<Vec<u8>> {
if cc_digests.len() < 2 {
return Err(IsccError::InvalidInput(
"at least 2 Content-Codes required for mixing".into(),
));
}
let nbytes = (bits / 8) as usize;
let mut prepared: Vec<Vec<u8>> = Vec::with_capacity(cc_digests.len());
for raw in cc_digests {
let (mtype, stype, _ver, blen, body) = codec::decode_header(raw)?;
if mtype != codec::MainType::Content {
return Err(IsccError::InvalidInput(
"all codes must be Content-Codes".into(),
));
}
let unit_bits = codec::decode_length(mtype, blen, stype);
if unit_bits < bits {
return Err(IsccError::InvalidInput(format!(
"Content-Code too short for {bits}-bit length (has {unit_bits} bits)"
)));
}
let mut entry = Vec::with_capacity(nbytes);
entry.push(raw[0]); let take = std::cmp::min(nbytes - 1, body.len());
entry.extend_from_slice(&body[..take]);
while entry.len() < nbytes {
entry.push(0);
}
prepared.push(entry);
}
Ok(simhash::alg_simhash_inner(&prepared))
}
pub fn gen_mixed_code_v0(codes: &[&str], bits: u32) -> IsccResult<MixedCodeResult> {
let decoded: Vec<Vec<u8>> = codes
.iter()
.map(|code| {
let clean = code.strip_prefix("ISCC:").unwrap_or(code);
codec::decode_base32(clean)
})
.collect::<IsccResult<Vec<Vec<u8>>>>()?;
let digest = soft_hash_codes_v0(&decoded, bits)?;
let component = codec::encode_component(
codec::MainType::Content,
codec::SubType::Mixed,
codec::Version::V0,
bits,
&digest,
)?;
Ok(MixedCodeResult {
iscc: format!("ISCC:{component}"),
parts: codes.iter().map(|s| s.to_string()).collect(),
})
}
pub fn gen_data_code_v0(data: &[u8], bits: u32) -> IsccResult<DataCodeResult> {
let chunks = cdc::alg_cdc_chunks_unchecked(data, false, cdc::DATA_AVG_CHUNK_SIZE);
let mut features: Vec<u32> = chunks
.iter()
.map(|chunk| xxhash_rust::xxh32::xxh32(chunk, 0))
.collect();
if features.is_empty() {
features.push(xxhash_rust::xxh32::xxh32(b"", 0));
}
let digest = minhash::alg_minhash_256(&features);
let component = codec::encode_component(
codec::MainType::Data,
codec::SubType::None,
codec::Version::V0,
bits,
&digest,
)?;
Ok(DataCodeResult {
iscc: format!("ISCC:{component}"),
})
}
pub fn gen_instance_code_v0(data: &[u8], bits: u32) -> IsccResult<InstanceCodeResult> {
let digest = blake3::hash(data);
let datahash = utils::multi_hash_blake3(data);
let filesize = data.len() as u64;
let component = codec::encode_component(
codec::MainType::Instance,
codec::SubType::None,
codec::Version::V0,
bits,
digest.as_bytes(),
)?;
Ok(InstanceCodeResult {
iscc: format!("ISCC:{component}"),
datahash,
filesize,
})
}
pub fn gen_iscc_code_v0(codes: &[&str], wide: bool) -> IsccResult<IsccCodeResult> {
let cleaned: Vec<&str> = codes
.iter()
.map(|c| c.strip_prefix("ISCC:").unwrap_or(c))
.collect();
if cleaned.len() < 2 {
return Err(IsccError::InvalidInput(
"at least 2 ISCC unit codes required".into(),
));
}
for code in &cleaned {
if code.len() < 16 {
return Err(IsccError::InvalidInput(format!(
"ISCC unit code too short (min 16 chars): {code}"
)));
}
}
let mut decoded: Vec<(
codec::MainType,
codec::SubType,
codec::Version,
u32,
Vec<u8>,
)> = Vec::with_capacity(cleaned.len());
for code in &cleaned {
let raw = codec::decode_base32(code)?;
let header = codec::decode_header(&raw)?;
decoded.push(header);
}
decoded.sort_by_key(|&(mt, ..)| mt);
let main_types: Vec<codec::MainType> = decoded.iter().map(|&(mt, ..)| mt).collect();
let n = main_types.len();
if main_types[n - 2] != codec::MainType::Data || main_types[n - 1] != codec::MainType::Instance
{
return Err(IsccError::InvalidInput(
"Data-Code and Instance-Code are mandatory".into(),
));
}
let is_wide = wide
&& decoded.len() == 2
&& main_types == [codec::MainType::Data, codec::MainType::Instance]
&& decoded
.iter()
.all(|&(mt, st, _, len, _)| codec::decode_length(mt, len, st) >= 128);
let st = if is_wide {
codec::SubType::Wide
} else {
let sc_subtypes: Vec<codec::SubType> = decoded
.iter()
.filter(|&&(mt, ..)| mt == codec::MainType::Semantic || mt == codec::MainType::Content)
.map(|&(_, st, ..)| st)
.collect();
if !sc_subtypes.is_empty() {
let first = sc_subtypes[0];
if sc_subtypes.iter().all(|&s| s == first) {
first
} else {
return Err(IsccError::InvalidInput(
"mixed SubTypes among Content/Semantic units".into(),
));
}
} else if decoded.len() == 2 {
codec::SubType::Sum
} else {
codec::SubType::IsccNone
}
};
let optional_types = &main_types[..n - 2];
let encoded_length = codec::encode_units(optional_types)?;
let bytes_per_unit = if is_wide { 16 } else { 8 };
let mut digest = Vec::with_capacity(decoded.len() * bytes_per_unit);
for (_, _, _, _, tail) in &decoded {
let take = bytes_per_unit.min(tail.len());
digest.extend_from_slice(&tail[..take]);
}
let header = codec::encode_header(
codec::MainType::Iscc,
st,
codec::Version::V0,
encoded_length,
)?;
let mut code_bytes = header;
code_bytes.extend_from_slice(&digest);
let code = codec::encode_base32(&code_bytes);
Ok(IsccCodeResult {
iscc: format!("ISCC:{code}"),
})
}
pub fn gen_sum_code_v0(
path: &std::path::Path,
bits: u32,
wide: bool,
add_units: bool,
) -> IsccResult<SumCodeResult> {
use std::io::Read;
let mut file = std::fs::File::open(path)
.map_err(|e| IsccError::InvalidInput(format!("Cannot open file: {e}")))?;
let mut data_hasher = streaming::DataHasher::new();
let mut instance_hasher = streaming::InstanceHasher::new();
let mut buf = vec![0u8; IO_READ_SIZE];
loop {
let n = file
.read(&mut buf)
.map_err(|e| IsccError::InvalidInput(format!("Cannot read file: {e}")))?;
if n == 0 {
break;
}
data_hasher.update(&buf[..n]);
instance_hasher.update(&buf[..n]);
}
let data_result = data_hasher.finalize(bits)?;
let instance_result = instance_hasher.finalize(bits)?;
let iscc_result = gen_iscc_code_v0(&[&data_result.iscc, &instance_result.iscc], wide)?;
let units = if add_units {
Some(vec![data_result.iscc, instance_result.iscc])
} else {
None
};
Ok(SumCodeResult {
iscc: iscc_result.iscc,
datahash: instance_result.datahash,
filesize: instance_result.filesize,
units,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(feature = "meta-code")]
#[test]
fn test_gen_meta_code_v0_title_only() {
let result = gen_meta_code_v0("Die Unendliche Geschichte", None, None, 64).unwrap();
assert_eq!(result.iscc, "ISCC:AAAZXZ6OU74YAZIM");
assert_eq!(result.name, "Die Unendliche Geschichte");
assert_eq!(result.description, None);
assert_eq!(result.meta, None);
}
#[cfg(feature = "meta-code")]
#[test]
fn test_gen_meta_code_v0_title_description() {
let result = gen_meta_code_v0(
"Die Unendliche Geschichte",
Some("Von Michael Ende"),
None,
64,
)
.unwrap();
assert_eq!(result.iscc, "ISCC:AAAZXZ6OU4E45RB5");
assert_eq!(result.name, "Die Unendliche Geschichte");
assert_eq!(result.description, Some("Von Michael Ende".to_string()));
assert_eq!(result.meta, None);
}
#[cfg(feature = "meta-code")]
#[test]
fn test_gen_meta_code_v0_json_meta() {
let result = gen_meta_code_v0("Hello", None, Some(r#"{"some":"object"}"#), 64).unwrap();
assert_eq!(result.iscc, "ISCC:AAAWKLHFXN63LHL2");
assert!(result.meta.is_some());
assert!(
result
.meta
.unwrap()
.starts_with("data:application/json;base64,")
);
}
#[cfg(feature = "meta-code")]
#[test]
fn test_gen_meta_code_v0_data_url_meta() {
let result = gen_meta_code_v0(
"Hello",
None,
Some("data:application/json;charset=utf-8;base64,eyJzb21lIjogIm9iamVjdCJ9"),
64,
)
.unwrap();
assert_eq!(result.iscc, "ISCC:AAAWKLHFXN43ICP2");
assert_eq!(
result.meta,
Some("data:application/json;charset=utf-8;base64,eyJzb21lIjogIm9iamVjdCJ9".to_string())
);
}
#[cfg(feature = "meta-code")]
#[test]
fn test_gen_meta_code_v0_jcs_float_canonicalization() {
let result = gen_meta_code_v0("Test", None, Some(r#"{"value":1.0}"#), 64).unwrap();
assert_eq!(
result.iscc, "ISCC:AAAX4GX3RZH2I6QZ",
"ISCC mismatch: parse_meta_json must use RFC 8785 (JCS) canonicalization"
);
assert_eq!(
result.meta,
Some("data:application/json;base64,eyJ2YWx1ZSI6MX0=".to_string()),
"meta Data-URL mismatch: JCS should serialize 1.0 as 1"
);
assert_eq!(
result.metahash, "1e2010b291d392b6999ffe4aa4661fb343fc371fca3bfb5bb4e8d8226fdf85743232",
"metahash mismatch: canonical bytes differ between JCS and serde_json"
);
}
#[cfg(feature = "meta-code")]
#[test]
fn test_gen_meta_code_v0_jcs_large_float_canonicalization() {
let result = gen_meta_code_v0("Test", None, Some(r#"{"value":1e20}"#), 64).unwrap();
assert_eq!(
result.iscc, "ISCC:AAAX4GX3R32YH5P7",
"ISCC mismatch: JCS should expand 1e20 to 100000000000000000000"
);
assert_eq!(
result.meta,
Some(
"data:application/json;base64,eyJ2YWx1ZSI6MTAwMDAwMDAwMDAwMDAwMDAwMDAwfQ=="
.to_string()
),
"meta Data-URL mismatch: JCS should expand large float to integer form"
);
assert_eq!(
result.metahash, "1e201ff83c1822c348717658a0b4713739646da7c59832691b337a457416ddd1c73d",
"metahash mismatch: canonical bytes differ for large float"
);
}
#[cfg(feature = "meta-code")]
#[test]
fn test_gen_meta_code_v0_invalid_json() {
assert!(matches!(
gen_meta_code_v0("test", None, Some("not json"), 64),
Err(IsccError::InvalidInput(_))
));
}
#[cfg(feature = "meta-code")]
#[test]
fn test_gen_meta_code_v0_invalid_data_url() {
assert!(matches!(
gen_meta_code_v0("test", None, Some("data:no-comma-here"), 64),
Err(IsccError::InvalidInput(_))
));
}
#[cfg(feature = "meta-code")]
#[test]
fn test_gen_meta_code_v0_conformance() {
let json_str = include_str!("../tests/data.json");
let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
let section = &data["gen_meta_code_v0"];
let cases = section.as_object().unwrap();
let mut tested = 0;
for (tc_name, tc) in cases {
let inputs = tc["inputs"].as_array().unwrap();
let input_name = inputs[0].as_str().unwrap();
let input_desc = inputs[1].as_str().unwrap();
let meta_val = &inputs[2];
let bits = inputs[3].as_u64().unwrap() as u32;
let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
let expected_metahash = tc["outputs"]["metahash"].as_str().unwrap();
let meta_arg: Option<String> = match meta_val {
serde_json::Value::Null => None,
serde_json::Value::String(s) => Some(s.clone()),
serde_json::Value::Object(_) => Some(serde_json::to_string(meta_val).unwrap()),
other => panic!("unexpected meta type in {tc_name}: {other:?}"),
};
let desc = if input_desc.is_empty() {
None
} else {
Some(input_desc)
};
let result = gen_meta_code_v0(input_name, desc, meta_arg.as_deref(), bits)
.unwrap_or_else(|e| panic!("gen_meta_code_v0 failed for {tc_name}: {e}"));
assert_eq!(
result.iscc, expected_iscc,
"ISCC mismatch in test case {tc_name}"
);
assert_eq!(
result.metahash, expected_metahash,
"metahash mismatch in test case {tc_name}"
);
if let Some(expected_name) = tc["outputs"].get("name") {
let expected_name = expected_name.as_str().unwrap();
assert_eq!(
result.name, expected_name,
"name mismatch in test case {tc_name}"
);
}
if let Some(expected_desc) = tc["outputs"].get("description") {
let expected_desc = expected_desc.as_str().unwrap();
assert_eq!(
result.description.as_deref(),
Some(expected_desc),
"description mismatch in test case {tc_name}"
);
}
if meta_arg.is_some() {
assert!(
result.meta.is_some(),
"meta should be present in test case {tc_name}"
);
} else {
assert!(
result.meta.is_none(),
"meta should be absent in test case {tc_name}"
);
}
tested += 1;
}
assert_eq!(tested, 20, "expected 20 conformance tests to run");
}
#[cfg(feature = "text-processing")]
#[test]
fn test_gen_text_code_v0_empty() {
let result = gen_text_code_v0("", 64).unwrap();
assert_eq!(result.iscc, "ISCC:EAASL4F2WZY7KBXB");
assert_eq!(result.characters, 0);
}
#[cfg(feature = "text-processing")]
#[test]
fn test_gen_text_code_v0_hello_world() {
let result = gen_text_code_v0("Hello World", 64).unwrap();
assert_eq!(result.iscc, "ISCC:EAASKDNZNYGUUF5A");
assert_eq!(result.characters, 10); }
#[cfg(feature = "text-processing")]
#[test]
fn test_gen_text_code_v0_conformance() {
let json_str = include_str!("../tests/data.json");
let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
let section = &data["gen_text_code_v0"];
let cases = section.as_object().unwrap();
let mut tested = 0;
for (tc_name, tc) in cases {
let inputs = tc["inputs"].as_array().unwrap();
let input_text = inputs[0].as_str().unwrap();
let bits = inputs[1].as_u64().unwrap() as u32;
let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
let expected_chars = tc["outputs"]["characters"].as_u64().unwrap() as usize;
let result = gen_text_code_v0(input_text, bits)
.unwrap_or_else(|e| panic!("gen_text_code_v0 failed for {tc_name}: {e}"));
assert_eq!(
result.iscc, expected_iscc,
"ISCC mismatch in test case {tc_name}"
);
assert_eq!(
result.characters, expected_chars,
"character count mismatch in test case {tc_name}"
);
tested += 1;
}
assert_eq!(tested, 5, "expected 5 conformance tests to run");
}
#[test]
fn test_gen_image_code_v0_all_black() {
let pixels = vec![0u8; 1024];
let result = gen_image_code_v0(&pixels, 64).unwrap();
assert_eq!(result.iscc, "ISCC:EEAQAAAAAAAAAAAA");
}
#[test]
fn test_gen_image_code_v0_all_white() {
let pixels = vec![255u8; 1024];
let result = gen_image_code_v0(&pixels, 128).unwrap();
assert_eq!(result.iscc, "ISCC:EEBYAAAAAAAAAAAAAAAAAAAAAAAAA");
}
#[test]
fn test_gen_image_code_v0_invalid_pixel_count() {
assert!(gen_image_code_v0(&[0u8; 100], 64).is_err());
}
#[test]
fn test_gen_image_code_v0_conformance() {
let json_str = include_str!("../tests/data.json");
let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
let section = &data["gen_image_code_v0"];
let cases = section.as_object().unwrap();
let mut tested = 0;
for (tc_name, tc) in cases {
let inputs = tc["inputs"].as_array().unwrap();
let pixels_json = inputs[0].as_array().unwrap();
let bits = inputs[1].as_u64().unwrap() as u32;
let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
let pixels: Vec<u8> = pixels_json
.iter()
.map(|v| v.as_u64().unwrap() as u8)
.collect();
let result = gen_image_code_v0(&pixels, bits)
.unwrap_or_else(|e| panic!("gen_image_code_v0 failed for {tc_name}: {e}"));
assert_eq!(
result.iscc, expected_iscc,
"ISCC mismatch in test case {tc_name}"
);
tested += 1;
}
assert_eq!(tested, 3, "expected 3 conformance tests to run");
}
#[test]
fn test_gen_audio_code_v0_empty() {
let result = gen_audio_code_v0(&[], 64).unwrap();
assert_eq!(result.iscc, "ISCC:EIAQAAAAAAAAAAAA");
}
#[test]
fn test_gen_audio_code_v0_single() {
let result = gen_audio_code_v0(&[1], 128).unwrap();
assert_eq!(result.iscc, "ISCC:EIBQAAAAAEAAAAABAAAAAAAAAAAAA");
}
#[test]
fn test_gen_audio_code_v0_negative() {
let result = gen_audio_code_v0(&[-1, 0, 1], 256).unwrap();
assert_eq!(
result.iscc,
"ISCC:EIDQAAAAAH777777AAAAAAAAAAAACAAAAAAP777774AAAAAAAAAAAAI"
);
}
#[test]
fn test_gen_audio_code_v0_conformance() {
let json_str = include_str!("../tests/data.json");
let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
let section = &data["gen_audio_code_v0"];
let cases = section.as_object().unwrap();
let mut tested = 0;
for (tc_name, tc) in cases {
let inputs = tc["inputs"].as_array().unwrap();
let cv_json = inputs[0].as_array().unwrap();
let bits = inputs[1].as_u64().unwrap() as u32;
let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
let cv: Vec<i32> = cv_json.iter().map(|v| v.as_i64().unwrap() as i32).collect();
let result = gen_audio_code_v0(&cv, bits)
.unwrap_or_else(|e| panic!("gen_audio_code_v0 failed for {tc_name}: {e}"));
assert_eq!(
result.iscc, expected_iscc,
"ISCC mismatch in test case {tc_name}"
);
tested += 1;
}
assert_eq!(tested, 5, "expected 5 conformance tests to run");
}
#[test]
fn test_array_split_even() {
let data = vec![1, 2, 3, 4];
let parts = array_split(&data, 4);
assert_eq!(parts, vec![&[1][..], &[2][..], &[3][..], &[4][..]]);
}
#[test]
fn test_array_split_remainder() {
let data = vec![1, 2, 3, 4, 5];
let parts = array_split(&data, 3);
assert_eq!(parts, vec![&[1, 2][..], &[3, 4][..], &[5][..]]);
}
#[test]
fn test_array_split_more_parts_than_elements() {
let data = vec![1, 2];
let parts = array_split(&data, 4);
assert_eq!(
parts,
vec![&[1][..], &[2][..], &[][..] as &[i32], &[][..] as &[i32]]
);
}
#[test]
fn test_array_split_empty() {
let data: Vec<i32> = vec![];
let parts = array_split(&data, 3);
assert_eq!(
parts,
vec![&[][..] as &[i32], &[][..] as &[i32], &[][..] as &[i32]]
);
}
#[test]
fn test_gen_video_code_v0_empty_frames() {
let frames: Vec<Vec<i32>> = vec![];
assert!(matches!(
gen_video_code_v0(&frames, 64),
Err(IsccError::InvalidInput(_))
));
}
#[test]
fn test_gen_video_code_v0_conformance() {
let json_str = include_str!("../tests/data.json");
let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
let section = &data["gen_video_code_v0"];
let cases = section.as_object().unwrap();
let mut tested = 0;
for (tc_name, tc) in cases {
let inputs = tc["inputs"].as_array().unwrap();
let frames_json = inputs[0].as_array().unwrap();
let bits = inputs[1].as_u64().unwrap() as u32;
let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
let frame_sigs: Vec<Vec<i32>> = frames_json
.iter()
.map(|frame| {
frame
.as_array()
.unwrap()
.iter()
.map(|v| v.as_i64().unwrap() as i32)
.collect()
})
.collect();
let result = gen_video_code_v0(&frame_sigs, bits)
.unwrap_or_else(|e| panic!("gen_video_code_v0 failed for {tc_name}: {e}"));
assert_eq!(
result.iscc, expected_iscc,
"ISCC mismatch in test case {tc_name}"
);
tested += 1;
}
assert_eq!(tested, 3, "expected 3 conformance tests to run");
}
#[test]
fn test_gen_mixed_code_v0_conformance() {
let json_str = include_str!("../tests/data.json");
let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
let section = &data["gen_mixed_code_v0"];
let cases = section.as_object().unwrap();
let mut tested = 0;
for (tc_name, tc) in cases {
let inputs = tc["inputs"].as_array().unwrap();
let codes_json = inputs[0].as_array().unwrap();
let bits = inputs[1].as_u64().unwrap() as u32;
let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
let expected_parts: Vec<&str> = tc["outputs"]["parts"]
.as_array()
.unwrap()
.iter()
.map(|v| v.as_str().unwrap())
.collect();
let codes: Vec<&str> = codes_json.iter().map(|v| v.as_str().unwrap()).collect();
let result = gen_mixed_code_v0(&codes, bits)
.unwrap_or_else(|e| panic!("gen_mixed_code_v0 failed for {tc_name}: {e}"));
assert_eq!(
result.iscc, expected_iscc,
"ISCC mismatch in test case {tc_name}"
);
let result_parts: Vec<&str> = result.parts.iter().map(|s| s.as_str()).collect();
assert_eq!(
result_parts, expected_parts,
"parts mismatch in test case {tc_name}"
);
tested += 1;
}
assert_eq!(tested, 2, "expected 2 conformance tests to run");
}
#[test]
fn test_gen_mixed_code_v0_too_few_codes() {
assert!(matches!(
gen_mixed_code_v0(&["EUA6GIKXN42IQV3S"], 64),
Err(IsccError::InvalidInput(_))
));
}
fn make_content_code_raw(stype: codec::SubType, bit_length: u32) -> Vec<u8> {
let nbytes = (bit_length / 8) as usize;
let body: Vec<u8> = (0..nbytes).map(|i| (i & 0xFF) as u8).collect();
let base32 = codec::encode_component(
codec::MainType::Content,
stype,
codec::Version::V0,
bit_length,
&body,
)
.unwrap();
codec::decode_base32(&base32).unwrap()
}
#[test]
fn test_soft_hash_codes_v0_rejects_short_code() {
let code_64 = make_content_code_raw(codec::SubType::None, 64);
let code_32 = make_content_code_raw(codec::SubType::Image, 32);
let result = soft_hash_codes_v0(&[code_64, code_32], 64);
assert!(
matches!(&result, Err(IsccError::InvalidInput(msg)) if msg.contains("too short")),
"expected InvalidInput with 'too short', got {result:?}"
);
}
#[test]
fn test_soft_hash_codes_v0_accepts_exact_length() {
let code_a = make_content_code_raw(codec::SubType::None, 64);
let code_b = make_content_code_raw(codec::SubType::Image, 64);
let result = soft_hash_codes_v0(&[code_a, code_b], 64);
assert!(result.is_ok(), "expected Ok, got {result:?}");
}
#[test]
fn test_soft_hash_codes_v0_accepts_longer_codes() {
let code_a = make_content_code_raw(codec::SubType::None, 128);
let code_b = make_content_code_raw(codec::SubType::Audio, 128);
let result = soft_hash_codes_v0(&[code_a, code_b], 64);
assert!(result.is_ok(), "expected Ok, got {result:?}");
}
#[test]
fn test_gen_data_code_v0_conformance() {
let json_str = include_str!("../tests/data.json");
let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
let section = &data["gen_data_code_v0"];
let cases = section.as_object().unwrap();
let mut tested = 0;
for (tc_name, tc) in cases {
let inputs = tc["inputs"].as_array().unwrap();
let stream_str = inputs[0].as_str().unwrap();
let bits = inputs[1].as_u64().unwrap() as u32;
let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
let hex_data = stream_str
.strip_prefix("stream:")
.unwrap_or_else(|| panic!("expected 'stream:' prefix in test case {tc_name}"));
let input_bytes = hex::decode(hex_data)
.unwrap_or_else(|e| panic!("invalid hex in test case {tc_name}: {e}"));
let result = gen_data_code_v0(&input_bytes, bits)
.unwrap_or_else(|e| panic!("gen_data_code_v0 failed for {tc_name}: {e}"));
assert_eq!(
result.iscc, expected_iscc,
"ISCC mismatch in test case {tc_name}"
);
tested += 1;
}
assert_eq!(tested, 4, "expected 4 conformance tests to run");
}
#[test]
fn test_gen_instance_code_v0_empty() {
let result = gen_instance_code_v0(b"", 64).unwrap();
assert_eq!(result.iscc, "ISCC:IAA26E2JXH27TING");
assert_eq!(result.filesize, 0);
assert_eq!(
result.datahash,
"1e20af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262"
);
}
#[test]
fn test_gen_instance_code_v0_conformance() {
let json_str = include_str!("../tests/data.json");
let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
let section = &data["gen_instance_code_v0"];
let cases = section.as_object().unwrap();
for (name, tc) in cases {
let inputs = tc["inputs"].as_array().unwrap();
let stream_str = inputs[0].as_str().unwrap();
let bits = inputs[1].as_u64().unwrap() as u32;
let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
let hex_data = stream_str
.strip_prefix("stream:")
.unwrap_or_else(|| panic!("expected 'stream:' prefix in test case {name}"));
let input_bytes = hex::decode(hex_data)
.unwrap_or_else(|e| panic!("invalid hex in test case {name}: {e}"));
let result = gen_instance_code_v0(&input_bytes, bits)
.unwrap_or_else(|e| panic!("gen_instance_code_v0 failed for {name}: {e}"));
assert_eq!(
result.iscc, expected_iscc,
"ISCC mismatch in test case {name}"
);
if let Some(expected_datahash) = tc["outputs"].get("datahash") {
let expected_datahash = expected_datahash.as_str().unwrap();
assert_eq!(
result.datahash, expected_datahash,
"datahash mismatch in test case {name}"
);
}
if let Some(expected_filesize) = tc["outputs"].get("filesize") {
let expected_filesize = expected_filesize.as_u64().unwrap();
assert_eq!(
result.filesize, expected_filesize,
"filesize mismatch in test case {name}"
);
}
assert_eq!(
result.filesize,
input_bytes.len() as u64,
"filesize should match input length in test case {name}"
);
}
}
#[test]
fn test_gen_iscc_code_v0_conformance() {
let json_str = include_str!("../tests/data.json");
let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
let section = &data["gen_iscc_code_v0"];
let cases = section.as_object().unwrap();
let mut tested = 0;
for (tc_name, tc) in cases {
let inputs = tc["inputs"].as_array().unwrap();
let codes_json = inputs[0].as_array().unwrap();
let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
let codes: Vec<&str> = codes_json.iter().map(|v| v.as_str().unwrap()).collect();
let result = gen_iscc_code_v0(&codes, false)
.unwrap_or_else(|e| panic!("gen_iscc_code_v0 failed for {tc_name}: {e}"));
assert_eq!(
result.iscc, expected_iscc,
"ISCC mismatch in test case {tc_name}"
);
tested += 1;
}
assert_eq!(tested, 5, "expected 5 conformance tests to run");
}
#[test]
fn test_gen_iscc_code_v0_too_few_codes() {
assert!(matches!(
gen_iscc_code_v0(&["AAAWKLHFPV6OPKDG"], false),
Err(IsccError::InvalidInput(_))
));
}
#[test]
fn test_gen_iscc_code_v0_missing_instance() {
assert!(matches!(
gen_iscc_code_v0(&["AAAWKLHFPV6OPKDG", "AAAWKLHFPV6OPKDG"], false),
Err(IsccError::InvalidInput(_))
));
}
#[test]
fn test_gen_iscc_code_v0_short_code() {
assert!(matches!(
gen_iscc_code_v0(&["AAAWKLHFPV6", "AAAWKLHFPV6OPKDG"], false),
Err(IsccError::InvalidInput(_))
));
}
#[cfg(feature = "meta-code")]
#[test]
fn test_gen_meta_code_empty_data_url_enters_meta_branch() {
let result =
gen_meta_code_v0("Test", None, Some("data:application/json;base64,"), 64).unwrap();
assert_eq!(result.name, "Test");
assert_eq!(
result.meta,
Some("data:application/json;base64,".to_string()),
"empty Data-URL payload should still enter meta branch"
);
let expected_metahash = utils::multi_hash_blake3(&[]);
assert_eq!(
result.metahash, expected_metahash,
"metahash should be BLAKE3 of empty bytes"
);
}
#[cfg(feature = "meta-code")]
#[test]
fn test_soft_hash_meta_v0_with_bytes_empty_equals_name_only() {
let name_only = soft_hash_meta_v0("test", None);
let empty_bytes = soft_hash_meta_v0_with_bytes("test", &[]);
assert_eq!(
name_only, empty_bytes,
"empty bytes should produce same digest as name-only (no interleaving)"
);
}
#[cfg(feature = "meta-code")]
#[test]
fn test_meta_trim_name_value() {
assert_eq!(META_TRIM_NAME, 128);
}
#[cfg(feature = "meta-code")]
#[test]
fn test_meta_trim_description_value() {
assert_eq!(META_TRIM_DESCRIPTION, 4096);
}
#[test]
fn test_io_read_size_value() {
assert_eq!(IO_READ_SIZE, 4_194_304);
}
#[test]
fn test_text_ngram_size_value() {
assert_eq!(TEXT_NGRAM_SIZE, 13);
}
#[test]
fn test_encode_component_matches_codec() {
let digest = [0xABu8; 8];
let tier1 = encode_component(3, 0, 0, 64, &digest).unwrap();
let tier2 = codec::encode_component(
codec::MainType::Data,
codec::SubType::None,
codec::Version::V0,
64,
&digest,
)
.unwrap();
assert_eq!(tier1, tier2);
}
#[test]
fn test_encode_component_round_trip() {
let digest = [0x42u8; 32];
let result = encode_component(0, 0, 0, 64, &digest).unwrap();
assert!(!result.is_empty());
}
#[test]
fn test_encode_component_rejects_iscc() {
let result = encode_component(5, 0, 0, 64, &[0u8; 8]);
assert!(result.is_err());
}
#[test]
fn test_encode_component_rejects_short_digest() {
let result = encode_component(0, 0, 0, 64, &[0u8; 4]);
assert!(result.is_err());
let err = result.unwrap_err().to_string();
assert!(
err.contains("digest length 4 < bit_length/8 (8)"),
"unexpected error: {err}"
);
}
#[test]
fn test_encode_component_rejects_invalid_mtype() {
let result = encode_component(99, 0, 0, 64, &[0u8; 8]);
assert!(result.is_err());
}
#[test]
fn test_encode_component_rejects_invalid_stype() {
let result = encode_component(0, 99, 0, 64, &[0u8; 8]);
assert!(result.is_err());
}
#[test]
fn test_encode_component_rejects_invalid_version() {
let result = encode_component(0, 0, 99, 64, &[0u8; 8]);
assert!(result.is_err());
}
#[test]
fn test_iscc_decode_round_trip_meta() {
let digest = [0xaa_u8; 8];
let encoded = encode_component(0, 0, 0, 64, &digest).unwrap();
let (mt, st, vs, li, decoded_digest) = iscc_decode(&encoded).unwrap();
assert_eq!(mt, 0, "MainType::Meta");
assert_eq!(st, 0, "SubType::None");
assert_eq!(vs, 0, "Version::V0");
assert_eq!(li, 1, "length_index");
assert_eq!(decoded_digest, digest.to_vec());
}
#[test]
fn test_iscc_decode_round_trip_content() {
let digest = [0xbb_u8; 8];
let encoded = encode_component(2, 0, 0, 64, &digest).unwrap();
let (mt, st, vs, _li, decoded_digest) = iscc_decode(&encoded).unwrap();
assert_eq!(mt, 2, "MainType::Content");
assert_eq!(st, 0, "SubType::TEXT");
assert_eq!(vs, 0, "Version::V0");
assert_eq!(decoded_digest, digest.to_vec());
}
#[test]
fn test_iscc_decode_round_trip_data() {
let digest = [0xcc_u8; 8];
let encoded = encode_component(3, 0, 0, 64, &digest).unwrap();
let (mt, _st, _vs, _li, decoded_digest) = iscc_decode(&encoded).unwrap();
assert_eq!(mt, 3, "MainType::Data");
assert_eq!(decoded_digest, digest.to_vec());
}
#[test]
fn test_iscc_decode_round_trip_instance() {
let digest = [0xdd_u8; 8];
let encoded = encode_component(4, 0, 0, 64, &digest).unwrap();
let (mt, _st, _vs, _li, decoded_digest) = iscc_decode(&encoded).unwrap();
assert_eq!(mt, 4, "MainType::Instance");
assert_eq!(decoded_digest, digest.to_vec());
}
#[test]
fn test_iscc_decode_with_prefix() {
let digest = [0xaa_u8; 8];
let encoded = encode_component(0, 0, 0, 64, &digest).unwrap();
let with_prefix = format!("ISCC:{encoded}");
let (mt, st, vs, li, decoded_digest) = iscc_decode(&with_prefix).unwrap();
assert_eq!(mt, 0);
assert_eq!(st, 0);
assert_eq!(vs, 0);
assert_eq!(li, 1);
assert_eq!(decoded_digest, digest.to_vec());
}
#[test]
fn test_iscc_decode_with_dashes() {
let digest = [0xaa_u8; 8];
let encoded = encode_component(0, 0, 0, 64, &digest).unwrap();
let with_dashes = format!("{}-{}-{}", &encoded[..4], &encoded[4..8], &encoded[8..]);
let (mt, st, vs, li, decoded_digest) = iscc_decode(&with_dashes).unwrap();
assert_eq!(mt, 0);
assert_eq!(st, 0);
assert_eq!(vs, 0);
assert_eq!(li, 1);
assert_eq!(decoded_digest, digest.to_vec());
}
#[test]
fn test_iscc_decode_invalid_base32() {
let result = iscc_decode("!!!INVALID!!!");
assert!(result.is_err());
let err = result.unwrap_err().to_string();
assert!(err.contains("base32"), "expected base32 error: {err}");
}
#[test]
fn test_iscc_decode_known_meta_code() {
let (mt, st, vs, li, digest) = iscc_decode("ISCC:AAAZXZ6OU74YAZIM").unwrap();
assert_eq!(mt, 0, "MainType::Meta");
assert_eq!(st, 0, "SubType::None");
assert_eq!(vs, 0, "Version::V0");
assert_eq!(li, 1, "length_index for 64-bit");
assert_eq!(digest.len(), 8, "64-bit = 8 bytes");
}
#[test]
fn test_iscc_decode_known_instance_code() {
let (mt, st, vs, li, digest) = iscc_decode("ISCC:IAA26E2JXH27TING").unwrap();
assert_eq!(mt, 4, "MainType::Instance");
assert_eq!(st, 0, "SubType::None");
assert_eq!(vs, 0, "Version::V0");
assert_eq!(li, 1, "length_index for 64-bit");
assert_eq!(digest.len(), 8, "64-bit = 8 bytes");
}
#[test]
fn test_iscc_decode_known_data_code() {
let (mt, st, vs, _li, digest) = iscc_decode("ISCC:GAAXL2XYM5BQIAZ3").unwrap();
assert_eq!(mt, 3, "MainType::Data");
assert_eq!(st, 0, "SubType::None");
assert_eq!(vs, 0, "Version::V0");
assert_eq!(digest.len(), 8, "64-bit = 8 bytes");
}
#[test]
fn test_iscc_decode_verification_round_trip() {
let digest = [0xaa_u8; 8];
let encoded = encode_component(0, 0, 0, 64, &digest).unwrap();
let result = iscc_decode(&encoded).unwrap();
assert_eq!(result, (0, 0, 0, 1, vec![0xaa; 8]));
}
#[test]
fn test_iscc_decode_truncated_input() {
let digest = [0xff_u8; 32];
let encoded = encode_component(0, 0, 0, 256, &digest).unwrap();
let truncated = &encoded[..6];
let result = iscc_decode(truncated);
assert!(result.is_err(), "should fail on truncated input");
}
#[cfg(feature = "meta-code")]
#[test]
fn test_json_to_data_url_basic() {
let url = json_to_data_url(r#"{"key": "value"}"#).unwrap();
assert!(
url.starts_with("data:application/json;base64,"),
"expected application/json prefix, got: {url}"
);
}
#[cfg(feature = "meta-code")]
#[test]
fn test_json_to_data_url_ld_json() {
let url = json_to_data_url(r#"{"@context": "https://schema.org"}"#).unwrap();
assert!(
url.starts_with("data:application/ld+json;base64,"),
"expected application/ld+json prefix, got: {url}"
);
}
#[cfg(feature = "meta-code")]
#[test]
fn test_json_to_data_url_jcs_ordering() {
let url = json_to_data_url(r#"{"b":1,"a":2}"#).unwrap();
let b64 = url.split_once(',').unwrap().1;
let decoded = data_encoding::BASE64.decode(b64.as_bytes()).unwrap();
let canonical = std::str::from_utf8(&decoded).unwrap();
assert_eq!(canonical, r#"{"a":2,"b":1}"#, "JCS should sort keys");
}
#[cfg(feature = "meta-code")]
#[test]
fn test_json_to_data_url_round_trip() {
let input = r#"{"hello": "world", "num": 42}"#;
let url = json_to_data_url(input).unwrap();
let decoded_bytes = decode_data_url(&url).unwrap();
let canonical: serde_json::Value =
serde_json::from_slice(&decoded_bytes).expect("decoded bytes should be valid JSON");
let original: serde_json::Value = serde_json::from_str(input).unwrap();
assert_eq!(canonical, original, "round-trip preserves JSON semantics");
}
#[cfg(feature = "meta-code")]
#[test]
fn test_json_to_data_url_invalid_json() {
let result = json_to_data_url("not json");
assert!(result.is_err(), "should reject invalid JSON");
let err = result.unwrap_err().to_string();
assert!(
err.contains("invalid JSON"),
"expected 'invalid JSON' in error: {err}"
);
}
#[cfg(feature = "meta-code")]
#[test]
fn test_json_to_data_url_conformance_0016() {
let url = json_to_data_url(r#"{"some": "object"}"#).unwrap();
assert!(
url.starts_with("data:application/json;base64,"),
"expected application/json prefix"
);
let b64 = url.split_once(',').unwrap().1;
let decoded = data_encoding::BASE64.decode(b64.as_bytes()).unwrap();
let canonical = std::str::from_utf8(&decoded).unwrap();
assert_eq!(
canonical, r#"{"some":"object"}"#,
"JCS removes whitespace from JSON"
);
}
#[cfg(feature = "meta-code")]
#[test]
fn test_meta_trim_meta_value() {
assert_eq!(META_TRIM_META, 128_000);
}
#[cfg(feature = "meta-code")]
#[test]
fn test_gen_meta_code_v0_meta_at_limit() {
let padding = "a".repeat(128_000 - 8);
let json_str = format!(r#"{{"x":"{padding}"}}"#);
let result = gen_meta_code_v0("test", None, Some(&json_str), 64);
assert!(
result.is_ok(),
"payload at exactly META_TRIM_META should succeed"
);
}
#[cfg(feature = "meta-code")]
#[test]
fn test_gen_meta_code_v0_meta_over_limit() {
let padding = "a".repeat(128_000 - 8 + 1);
let json_str = format!(r#"{{"x":"{padding}"}}"#);
let result = gen_meta_code_v0("test", None, Some(&json_str), 64);
assert!(
matches!(result, Err(IsccError::InvalidInput(ref msg)) if msg.contains("size limit")),
"payload exceeding META_TRIM_META should return InvalidInput"
);
}
#[cfg(feature = "meta-code")]
#[test]
fn test_gen_meta_code_v0_data_url_pre_decode_reject() {
let pre_decode_limit = META_TRIM_META * 4 / 3 + 256;
let padding = "A".repeat(pre_decode_limit + 1);
let data_url = format!("data:application/octet-stream;base64,{padding}");
let result = gen_meta_code_v0("test", None, Some(&data_url), 64);
assert!(
matches!(result, Err(IsccError::InvalidInput(ref msg)) if msg.contains("size limit")),
"oversized Data-URL should be rejected before decoding"
);
}
fn write_temp_file(name: &str, data: &[u8]) -> std::path::PathBuf {
let path = std::env::temp_dir().join(format!("iscc_test_{name}"));
std::fs::write(&path, data).expect("failed to write temp file");
path
}
#[test]
fn test_gen_sum_code_v0_equivalence() {
let data = b"Hello, ISCC World! This is a test of gen_sum_code_v0.";
let path = write_temp_file("sum_equiv", data);
let sum_result = gen_sum_code_v0(&path, 64, false, false).unwrap();
let data_result = gen_data_code_v0(data, 64).unwrap();
let instance_result = gen_instance_code_v0(data, 64).unwrap();
let iscc_result =
gen_iscc_code_v0(&[&data_result.iscc, &instance_result.iscc], false).unwrap();
assert_eq!(sum_result.iscc, iscc_result.iscc);
assert_eq!(sum_result.datahash, instance_result.datahash);
assert_eq!(sum_result.filesize, instance_result.filesize);
assert_eq!(sum_result.filesize, data.len() as u64);
assert_eq!(sum_result.units, None);
std::fs::remove_file(&path).ok();
}
#[test]
fn test_gen_sum_code_v0_empty_file() {
let path = write_temp_file("sum_empty", b"");
let sum_result = gen_sum_code_v0(&path, 64, false, false).unwrap();
let data_result = gen_data_code_v0(b"", 64).unwrap();
let instance_result = gen_instance_code_v0(b"", 64).unwrap();
let iscc_result =
gen_iscc_code_v0(&[&data_result.iscc, &instance_result.iscc], false).unwrap();
assert_eq!(sum_result.iscc, iscc_result.iscc);
assert_eq!(sum_result.datahash, instance_result.datahash);
assert_eq!(sum_result.filesize, 0);
std::fs::remove_file(&path).ok();
}
#[test]
fn test_gen_sum_code_v0_file_not_found() {
let path = std::env::temp_dir().join("iscc_test_nonexistent_file_xyz");
let result = gen_sum_code_v0(&path, 64, false, false);
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
assert!(
err_msg.contains("Cannot open file"),
"error message should mention file open failure: {err_msg}"
);
}
#[test]
fn test_gen_sum_code_v0_wide_mode() {
let data = b"Testing wide mode for gen_sum_code_v0 function.";
let path = write_temp_file("sum_wide", data);
let narrow = gen_sum_code_v0(&path, 64, false, false).unwrap();
let wide = gen_sum_code_v0(&path, 64, true, false).unwrap();
assert_eq!(narrow.iscc, wide.iscc);
let narrow_128 = gen_sum_code_v0(&path, 128, false, false).unwrap();
let wide_128 = gen_sum_code_v0(&path, 128, true, false).unwrap();
assert_ne!(narrow_128.iscc, wide_128.iscc);
assert_eq!(narrow_128.datahash, wide_128.datahash);
assert_eq!(narrow_128.filesize, wide_128.filesize);
std::fs::remove_file(&path).ok();
}
#[test]
fn test_gen_sum_code_v0_bits_64() {
let data = b"Testing 64-bit gen_sum_code_v0.";
let path = write_temp_file("sum_bits64", data);
let sum_result = gen_sum_code_v0(&path, 64, false, false).unwrap();
let data_result = gen_data_code_v0(data, 64).unwrap();
let instance_result = gen_instance_code_v0(data, 64).unwrap();
let iscc_result =
gen_iscc_code_v0(&[&data_result.iscc, &instance_result.iscc], false).unwrap();
assert_eq!(sum_result.iscc, iscc_result.iscc);
std::fs::remove_file(&path).ok();
}
#[test]
fn test_gen_sum_code_v0_bits_128() {
let data = b"Testing 128-bit gen_sum_code_v0.";
let path = write_temp_file("sum_bits128", data);
let sum_result = gen_sum_code_v0(&path, 128, false, false).unwrap();
let data_result = gen_data_code_v0(data, 128).unwrap();
let instance_result = gen_instance_code_v0(data, 128).unwrap();
let iscc_result =
gen_iscc_code_v0(&[&data_result.iscc, &instance_result.iscc], false).unwrap();
assert_eq!(sum_result.iscc, iscc_result.iscc);
assert_eq!(sum_result.datahash, instance_result.datahash);
assert_eq!(sum_result.filesize, data.len() as u64);
std::fs::remove_file(&path).ok();
}
#[test]
fn test_gen_sum_code_v0_large_data() {
let data: Vec<u8> = (0..50_000).map(|i| (i % 256) as u8).collect();
let path = write_temp_file("sum_large", &data);
let sum_result = gen_sum_code_v0(&path, 64, false, false).unwrap();
let data_result = gen_data_code_v0(&data, 64).unwrap();
let instance_result = gen_instance_code_v0(&data, 64).unwrap();
let iscc_result =
gen_iscc_code_v0(&[&data_result.iscc, &instance_result.iscc], false).unwrap();
assert_eq!(sum_result.iscc, iscc_result.iscc);
assert_eq!(sum_result.datahash, instance_result.datahash);
assert_eq!(sum_result.filesize, data.len() as u64);
std::fs::remove_file(&path).ok();
}
#[test]
fn test_gen_sum_code_v0_units_enabled() {
let data = b"Hello, ISCC World! This is a test of gen_sum_code_v0 units.";
let path = write_temp_file("sum_units_on", data);
let sum_result = gen_sum_code_v0(&path, 64, false, true).unwrap();
let units = sum_result.units.as_ref().expect("units should be Some");
assert_eq!(
units.len(),
2,
"units should contain [Data-Code, Instance-Code]"
);
let (maintype, ..) = iscc_decode(&units[0]).unwrap();
assert_eq!(
maintype, 3,
"first unit should be a Data-Code (MainType::Data = 3)"
);
let (maintype, ..) = iscc_decode(&units[1]).unwrap();
assert_eq!(
maintype, 4,
"second unit should be an Instance-Code (MainType::Instance = 4)"
);
let data_result = gen_data_code_v0(data, 64).unwrap();
let instance_result = gen_instance_code_v0(data, 64).unwrap();
assert_eq!(units[0], data_result.iscc);
assert_eq!(units[1], instance_result.iscc);
let iscc_result =
gen_iscc_code_v0(&[&data_result.iscc, &instance_result.iscc], false).unwrap();
assert_eq!(sum_result.iscc, iscc_result.iscc);
std::fs::remove_file(&path).ok();
}
#[test]
fn test_gen_sum_code_v0_units_disabled() {
let data = b"Hello, ISCC World! This is a test of gen_sum_code_v0 no units.";
let path = write_temp_file("sum_units_off", data);
let sum_result = gen_sum_code_v0(&path, 64, false, false).unwrap();
assert_eq!(
sum_result.units, None,
"units should be None when add_units is false"
);
let data_result = gen_data_code_v0(data, 64).unwrap();
let instance_result = gen_instance_code_v0(data, 64).unwrap();
let iscc_result =
gen_iscc_code_v0(&[&data_result.iscc, &instance_result.iscc], false).unwrap();
assert_eq!(sum_result.iscc, iscc_result.iscc);
std::fs::remove_file(&path).ok();
}
}