1pub mod cdc;
8pub mod codec;
9pub mod conformance;
10pub(crate) mod dct;
11pub mod minhash;
12pub mod simhash;
13pub mod streaming;
14pub mod types;
15pub mod utils;
16pub(crate) mod wtahash;
17
18pub use cdc::alg_cdc_chunks;
19pub use codec::encode_base64;
20pub use codec::iscc_decompose;
21pub use conformance::conformance_selftest;
22pub use minhash::alg_minhash_256;
23pub use simhash::{alg_simhash, sliding_window};
24pub use streaming::{DataHasher, InstanceHasher};
25pub use types::*;
26pub use utils::{text_clean, text_collapse, text_remove_newlines, text_trim};
27
28pub const META_TRIM_NAME: usize = 128;
30
31pub const META_TRIM_DESCRIPTION: usize = 4096;
33
34pub const IO_READ_SIZE: usize = 4_194_304;
36
37pub const TEXT_NGRAM_SIZE: usize = 13;
39
40#[derive(Debug, thiserror::Error)]
42pub enum IsccError {
43 #[error("invalid input: {0}")]
45 InvalidInput(String),
46}
47
48pub type IsccResult<T> = Result<T, IsccError>;
50
51fn interleave_digests(a: &[u8], b: &[u8]) -> Vec<u8> {
57 let mut result = vec![0u8; 32];
58 for chunk in 0..4 {
59 let src = chunk * 4;
60 let dst_a = chunk * 8;
61 let dst_b = chunk * 8 + 4;
62 result[dst_a..dst_a + 4].copy_from_slice(&a[src..src + 4]);
63 result[dst_b..dst_b + 4].copy_from_slice(&b[src..src + 4]);
64 }
65 result
66}
67
68fn meta_name_simhash(name: &str) -> Vec<u8> {
73 let collapsed_name = utils::text_collapse(name);
74 let name_ngrams = simhash::sliding_window_strs(&collapsed_name, 3);
75 let name_hashes: Vec<[u8; 32]> = name_ngrams
76 .iter()
77 .map(|ng| *blake3::hash(ng.as_bytes()).as_bytes())
78 .collect();
79 simhash::alg_simhash_inner(&name_hashes)
80}
81
82fn soft_hash_meta_v0(name: &str, extra: Option<&str>) -> Vec<u8> {
87 let name_simhash = meta_name_simhash(name);
88
89 match extra {
90 None | Some("") => name_simhash,
91 Some(extra_str) => {
92 let collapsed_extra = utils::text_collapse(extra_str);
93 let extra_ngrams = simhash::sliding_window_strs(&collapsed_extra, 3);
94 let extra_hashes: Vec<[u8; 32]> = extra_ngrams
95 .iter()
96 .map(|ng| *blake3::hash(ng.as_bytes()).as_bytes())
97 .collect();
98 let extra_simhash = simhash::alg_simhash_inner(&extra_hashes);
99
100 interleave_digests(&name_simhash, &extra_simhash)
101 }
102 }
103}
104
105fn soft_hash_meta_v0_with_bytes(name: &str, extra: &[u8]) -> Vec<u8> {
111 let name_simhash = meta_name_simhash(name);
112
113 if extra.is_empty() {
114 return name_simhash;
115 }
116
117 let byte_ngrams = simhash::sliding_window_bytes(extra, 4);
118 let byte_hashes: Vec<[u8; 32]> = byte_ngrams
119 .iter()
120 .map(|ng| *blake3::hash(ng).as_bytes())
121 .collect();
122 let byte_simhash = simhash::alg_simhash_inner(&byte_hashes);
123
124 interleave_digests(&name_simhash, &byte_simhash)
125}
126
127fn decode_data_url(data_url: &str) -> IsccResult<Vec<u8>> {
133 let payload_b64 = data_url
134 .split_once(',')
135 .map(|(_, b64)| b64)
136 .ok_or_else(|| IsccError::InvalidInput("Data-URL missing comma separator".into()))?;
137 data_encoding::BASE64
138 .decode(payload_b64.as_bytes())
139 .map_err(|e| IsccError::InvalidInput(format!("invalid base64 in Data-URL: {e}")))
140}
141
142fn parse_meta_json(meta_str: &str) -> IsccResult<Vec<u8>> {
144 let parsed: serde_json::Value = serde_json::from_str(meta_str)
145 .map_err(|e| IsccError::InvalidInput(format!("invalid JSON in meta: {e}")))?;
146 let mut buf = Vec::new();
147 serde_json_canonicalizer::to_writer(&parsed, &mut buf)
148 .map_err(|e| IsccError::InvalidInput(format!("JSON canonicalization failed: {e}")))?;
149 Ok(buf)
150}
151
152fn build_meta_data_url(json_bytes: &[u8], json_value: &serde_json::Value) -> String {
157 let media_type = if json_value.get("@context").is_some() {
158 "application/ld+json"
159 } else {
160 "application/json"
161 };
162 let b64 = data_encoding::BASE64.encode(json_bytes);
163 format!("data:{media_type};base64,{b64}")
164}
165
166pub fn encode_component(
176 mtype: u8,
177 stype: u8,
178 version: u8,
179 bit_length: u32,
180 digest: &[u8],
181) -> IsccResult<String> {
182 let mt = codec::MainType::try_from(mtype)?;
183 let st = codec::SubType::try_from(stype)?;
184 let vs = codec::Version::try_from(version)?;
185 let needed = (bit_length / 8) as usize;
186 if digest.len() < needed {
187 return Err(IsccError::InvalidInput(format!(
188 "digest length {} < bit_length/8 ({})",
189 digest.len(),
190 needed
191 )));
192 }
193 codec::encode_component(mt, st, vs, bit_length, digest)
194}
195
196pub fn iscc_decode(iscc: &str) -> IsccResult<(u8, u8, u8, u8, Vec<u8>)> {
211 let clean = iscc.strip_prefix("ISCC:").unwrap_or(iscc);
213 let clean = clean.replace('-', "");
215 let raw = codec::decode_base32(&clean)?;
216 let (mt, st, vs, length_index, tail) = codec::decode_header(&raw)?;
217 let bit_length = codec::decode_length(mt, length_index, st);
218 let nbytes = (bit_length / 8) as usize;
219 if tail.len() < nbytes {
220 return Err(IsccError::InvalidInput(format!(
221 "decoded body too short: expected {nbytes} digest bytes, got {}",
222 tail.len()
223 )));
224 }
225 Ok((
226 mt as u8,
227 st as u8,
228 vs as u8,
229 length_index as u8,
230 tail[..nbytes].to_vec(),
231 ))
232}
233
234pub fn json_to_data_url(json: &str) -> IsccResult<String> {
261 let parsed: serde_json::Value = serde_json::from_str(json)
262 .map_err(|e| IsccError::InvalidInput(format!("invalid JSON: {e}")))?;
263 let mut canonical_bytes = Vec::new();
264 serde_json_canonicalizer::to_writer(&parsed, &mut canonical_bytes)
265 .map_err(|e| IsccError::InvalidInput(format!("JSON canonicalization failed: {e}")))?;
266 Ok(build_meta_data_url(&canonical_bytes, &parsed))
267}
268
269pub fn gen_meta_code_v0(
277 name: &str,
278 description: Option<&str>,
279 meta: Option<&str>,
280 bits: u32,
281) -> IsccResult<MetaCodeResult> {
282 let name = utils::text_clean(name);
284 let name = utils::text_remove_newlines(&name);
285 let name = utils::text_trim(&name, META_TRIM_NAME);
286
287 if name.is_empty() {
288 return Err(IsccError::InvalidInput(
289 "name is empty after normalization".into(),
290 ));
291 }
292
293 let desc_str = description.unwrap_or("");
295 let desc_clean = utils::text_clean(desc_str);
296 let desc_clean = utils::text_trim(&desc_clean, META_TRIM_DESCRIPTION);
297
298 let meta_payload: Option<Vec<u8>> = match meta {
300 Some(meta_str) if meta_str.starts_with("data:") => Some(decode_data_url(meta_str)?),
301 Some(meta_str) => Some(parse_meta_json(meta_str)?),
302 None => None,
303 };
304
305 if let Some(ref payload) = meta_payload {
307 let meta_code_digest = soft_hash_meta_v0_with_bytes(&name, payload);
308 let metahash = utils::multi_hash_blake3(payload);
309
310 let meta_code = codec::encode_component(
311 codec::MainType::Meta,
312 codec::SubType::None,
313 codec::Version::V0,
314 bits,
315 &meta_code_digest,
316 )?;
317
318 let meta_value = match meta {
320 Some(meta_str) if meta_str.starts_with("data:") => meta_str.to_string(),
321 Some(meta_str) => {
322 let parsed: serde_json::Value = serde_json::from_str(meta_str)
323 .map_err(|e| IsccError::InvalidInput(format!("invalid JSON: {e}")))?;
324 build_meta_data_url(payload, &parsed)
325 }
326 None => unreachable!(),
327 };
328
329 Ok(MetaCodeResult {
330 iscc: format!("ISCC:{meta_code}"),
331 name: name.clone(),
332 description: if desc_clean.is_empty() {
333 None
334 } else {
335 Some(desc_clean)
336 },
337 meta: Some(meta_value),
338 metahash,
339 })
340 } else {
341 let payload = if desc_clean.is_empty() {
343 name.clone()
344 } else {
345 format!("{name} {desc_clean}")
346 };
347 let payload = payload.trim().to_string();
348 let metahash = utils::multi_hash_blake3(payload.as_bytes());
349
350 let extra = if desc_clean.is_empty() {
352 None
353 } else {
354 Some(desc_clean.as_str())
355 };
356 let meta_code_digest = soft_hash_meta_v0(&name, extra);
357
358 let meta_code = codec::encode_component(
359 codec::MainType::Meta,
360 codec::SubType::None,
361 codec::Version::V0,
362 bits,
363 &meta_code_digest,
364 )?;
365
366 Ok(MetaCodeResult {
367 iscc: format!("ISCC:{meta_code}"),
368 name: name.clone(),
369 description: if desc_clean.is_empty() {
370 None
371 } else {
372 Some(desc_clean)
373 },
374 meta: None,
375 metahash,
376 })
377 }
378}
379
380fn soft_hash_text_v0(text: &str) -> Vec<u8> {
385 let ngrams = simhash::sliding_window_strs(text, TEXT_NGRAM_SIZE);
386 let features: Vec<u32> = ngrams
387 .iter()
388 .map(|ng| xxhash_rust::xxh32::xxh32(ng.as_bytes(), 0))
389 .collect();
390 minhash::alg_minhash_256(&features)
391}
392
393pub fn gen_text_code_v0(text: &str, bits: u32) -> IsccResult<TextCodeResult> {
399 let collapsed = utils::text_collapse(text);
400 let characters = collapsed.chars().count();
401 let hash_digest = soft_hash_text_v0(&collapsed);
402 let component = codec::encode_component(
403 codec::MainType::Content,
404 codec::SubType::TEXT,
405 codec::Version::V0,
406 bits,
407 &hash_digest,
408 )?;
409 Ok(TextCodeResult {
410 iscc: format!("ISCC:{component}"),
411 characters,
412 })
413}
414
415fn transpose_matrix(matrix: &[Vec<f64>]) -> Vec<Vec<f64>> {
417 let rows = matrix.len();
418 if rows == 0 {
419 return vec![];
420 }
421 let cols = matrix[0].len();
422 let mut result = vec![vec![0.0f64; rows]; cols];
423 for (r, row) in matrix.iter().enumerate() {
424 for (c, &val) in row.iter().enumerate() {
425 result[c][r] = val;
426 }
427 }
428 result
429}
430
431fn flatten_8x8(matrix: &[Vec<f64>], col: usize, row: usize) -> Vec<f64> {
436 let mut flat = Vec::with_capacity(64);
437 for matrix_row in matrix.iter().skip(row).take(8) {
438 for &val in matrix_row.iter().skip(col).take(8) {
439 flat.push(val);
440 }
441 }
442 flat
443}
444
445fn compute_median(values: &[f64]) -> f64 {
450 let mut sorted: Vec<f64> = values.to_vec();
451 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
452 let n = sorted.len();
453 if n % 2 == 1 {
454 sorted[n / 2]
455 } else {
456 (sorted[n / 2 - 1] + sorted[n / 2]) / 2.0
457 }
458}
459
460fn bits_to_bytes(bits: &[bool]) -> Vec<u8> {
462 bits.chunks(8)
463 .map(|chunk| {
464 let mut byte = 0u8;
465 for (i, &bit) in chunk.iter().enumerate() {
466 if bit {
467 byte |= 1 << (7 - i);
468 }
469 }
470 byte
471 })
472 .collect()
473}
474
475fn soft_hash_image_v0(pixels: &[u8], bits: u32) -> IsccResult<Vec<u8>> {
481 if pixels.len() != 1024 {
482 return Err(IsccError::InvalidInput(format!(
483 "expected 1024 pixels, got {}",
484 pixels.len()
485 )));
486 }
487 if bits > 256 {
488 return Err(IsccError::InvalidInput(format!(
489 "bits must be <= 256, got {bits}"
490 )));
491 }
492
493 let rows: Vec<Vec<f64>> = pixels
495 .chunks(32)
496 .map(|row| {
497 let row_f64: Vec<f64> = row.iter().map(|&p| p as f64).collect();
498 dct::alg_dct(&row_f64)
499 })
500 .collect::<IsccResult<Vec<Vec<f64>>>>()?;
501
502 let transposed = transpose_matrix(&rows);
504
505 let dct_cols: Vec<Vec<f64>> = transposed
507 .iter()
508 .map(|col| dct::alg_dct(col))
509 .collect::<IsccResult<Vec<Vec<f64>>>>()?;
510
511 let dct_matrix = transpose_matrix(&dct_cols);
513
514 let positions = [(0, 0), (1, 0), (0, 1), (1, 1)];
516 let mut bitstring = Vec::<bool>::with_capacity(256);
517
518 for (col, row) in positions {
519 let flat = flatten_8x8(&dct_matrix, col, row);
520 let median = compute_median(&flat);
521 for val in &flat {
522 bitstring.push(*val > median);
523 }
524 if bitstring.len() >= bits as usize {
525 break;
526 }
527 }
528
529 Ok(bits_to_bytes(&bitstring[..bits as usize]))
531}
532
533pub fn gen_image_code_v0(pixels: &[u8], bits: u32) -> IsccResult<ImageCodeResult> {
539 let hash_digest = soft_hash_image_v0(pixels, bits)?;
540 let component = codec::encode_component(
541 codec::MainType::Content,
542 codec::SubType::Image,
543 codec::Version::V0,
544 bits,
545 &hash_digest,
546 )?;
547 Ok(ImageCodeResult {
548 iscc: format!("ISCC:{component}"),
549 })
550}
551
552fn array_split<T>(slice: &[T], n: usize) -> Vec<&[T]> {
558 if n == 0 {
559 return vec![];
560 }
561 let len = slice.len();
562 let base = len / n;
563 let remainder = len % n;
564 let mut parts = Vec::with_capacity(n);
565 let mut offset = 0;
566 for i in 0..n {
567 let size = base + if i < remainder { 1 } else { 0 };
568 parts.push(&slice[offset..offset + size]);
569 offset += size;
570 }
571 parts
572}
573
574fn soft_hash_audio_v0(cv: &[i32]) -> Vec<u8> {
581 let digests: Vec<[u8; 4]> = cv.iter().map(|&v| v.to_be_bytes()).collect();
583
584 if digests.is_empty() {
585 return vec![0u8; 32];
586 }
587
588 let mut parts: Vec<u8> = simhash::alg_simhash_inner(&digests);
590
591 let quarters = array_split(&digests, 4);
593 for quarter in &quarters {
594 if quarter.is_empty() {
595 parts.extend_from_slice(&[0u8; 4]);
596 } else {
597 parts.extend_from_slice(&simhash::alg_simhash_inner(quarter));
598 }
599 }
600
601 let mut sorted_values: Vec<i32> = cv.to_vec();
603 sorted_values.sort();
604 let sorted_digests: Vec<[u8; 4]> = sorted_values.iter().map(|&v| v.to_be_bytes()).collect();
605 let thirds = array_split(&sorted_digests, 3);
606 for third in &thirds {
607 if third.is_empty() {
608 parts.extend_from_slice(&[0u8; 4]);
609 } else {
610 parts.extend_from_slice(&simhash::alg_simhash_inner(third));
611 }
612 }
613
614 parts
615}
616
617pub fn gen_audio_code_v0(cv: &[i32], bits: u32) -> IsccResult<AudioCodeResult> {
622 let hash_digest = soft_hash_audio_v0(cv);
623 let component = codec::encode_component(
624 codec::MainType::Content,
625 codec::SubType::Audio,
626 codec::Version::V0,
627 bits,
628 &hash_digest,
629 )?;
630 Ok(AudioCodeResult {
631 iscc: format!("ISCC:{component}"),
632 })
633}
634
635pub fn soft_hash_video_v0<S: AsRef<[i32]> + Ord>(
640 frame_sigs: &[S],
641 bits: u32,
642) -> IsccResult<Vec<u8>> {
643 if frame_sigs.is_empty() {
644 return Err(IsccError::InvalidInput(
645 "frame_sigs must not be empty".into(),
646 ));
647 }
648
649 let unique: std::collections::BTreeSet<&S> = frame_sigs.iter().collect();
651
652 let cols = frame_sigs[0].as_ref().len();
654 let mut vecsum = vec![0i64; cols];
655 for sig in &unique {
656 for (c, &val) in sig.as_ref().iter().enumerate() {
657 vecsum[c] += val as i64;
658 }
659 }
660
661 wtahash::alg_wtahash(&vecsum, bits)
662}
663
664pub fn gen_video_code_v0<S: AsRef<[i32]> + Ord>(
669 frame_sigs: &[S],
670 bits: u32,
671) -> IsccResult<VideoCodeResult> {
672 let digest = soft_hash_video_v0(frame_sigs, bits)?;
673 let component = codec::encode_component(
674 codec::MainType::Content,
675 codec::SubType::Video,
676 codec::Version::V0,
677 bits,
678 &digest,
679 )?;
680 Ok(VideoCodeResult {
681 iscc: format!("ISCC:{component}"),
682 })
683}
684
685fn soft_hash_codes_v0(cc_digests: &[Vec<u8>], bits: u32) -> IsccResult<Vec<u8>> {
692 if cc_digests.len() < 2 {
693 return Err(IsccError::InvalidInput(
694 "at least 2 Content-Codes required for mixing".into(),
695 ));
696 }
697
698 let nbytes = (bits / 8) as usize;
699 let mut prepared: Vec<Vec<u8>> = Vec::with_capacity(cc_digests.len());
700
701 for raw in cc_digests {
702 let (mtype, stype, _ver, blen, body) = codec::decode_header(raw)?;
703 if mtype != codec::MainType::Content {
704 return Err(IsccError::InvalidInput(
705 "all codes must be Content-Codes".into(),
706 ));
707 }
708 let unit_bits = codec::decode_length(mtype, blen, stype);
709 if unit_bits < bits {
710 return Err(IsccError::InvalidInput(format!(
711 "Content-Code too short for {bits}-bit length (has {unit_bits} bits)"
712 )));
713 }
714 let mut entry = Vec::with_capacity(nbytes);
715 entry.push(raw[0]); let take = std::cmp::min(nbytes - 1, body.len());
717 entry.extend_from_slice(&body[..take]);
718 while entry.len() < nbytes {
720 entry.push(0);
721 }
722 prepared.push(entry);
723 }
724
725 Ok(simhash::alg_simhash_inner(&prepared))
726}
727
728pub fn gen_mixed_code_v0(codes: &[&str], bits: u32) -> IsccResult<MixedCodeResult> {
734 let decoded: Vec<Vec<u8>> = codes
735 .iter()
736 .map(|code| {
737 let clean = code.strip_prefix("ISCC:").unwrap_or(code);
738 codec::decode_base32(clean)
739 })
740 .collect::<IsccResult<Vec<Vec<u8>>>>()?;
741
742 let digest = soft_hash_codes_v0(&decoded, bits)?;
743
744 let component = codec::encode_component(
745 codec::MainType::Content,
746 codec::SubType::Mixed,
747 codec::Version::V0,
748 bits,
749 &digest,
750 )?;
751
752 Ok(MixedCodeResult {
753 iscc: format!("ISCC:{component}"),
754 parts: codes.iter().map(|s| s.to_string()).collect(),
755 })
756}
757
758pub fn gen_data_code_v0(data: &[u8], bits: u32) -> IsccResult<DataCodeResult> {
764 let chunks = cdc::alg_cdc_chunks(data, false, cdc::DATA_AVG_CHUNK_SIZE);
765 let mut features: Vec<u32> = chunks
766 .iter()
767 .map(|chunk| xxhash_rust::xxh32::xxh32(chunk, 0))
768 .collect();
769
770 if features.is_empty() {
772 features.push(xxhash_rust::xxh32::xxh32(b"", 0));
773 }
774
775 let digest = minhash::alg_minhash_256(&features);
776 let component = codec::encode_component(
777 codec::MainType::Data,
778 codec::SubType::None,
779 codec::Version::V0,
780 bits,
781 &digest,
782 )?;
783
784 Ok(DataCodeResult {
785 iscc: format!("ISCC:{component}"),
786 })
787}
788
789pub fn gen_instance_code_v0(data: &[u8], bits: u32) -> IsccResult<InstanceCodeResult> {
794 let digest = blake3::hash(data);
795 let datahash = utils::multi_hash_blake3(data);
796 let filesize = data.len() as u64;
797 let component = codec::encode_component(
798 codec::MainType::Instance,
799 codec::SubType::None,
800 codec::Version::V0,
801 bits,
802 digest.as_bytes(),
803 )?;
804 Ok(InstanceCodeResult {
805 iscc: format!("ISCC:{component}"),
806 datahash,
807 filesize,
808 })
809}
810
811pub fn gen_iscc_code_v0(codes: &[&str], wide: bool) -> IsccResult<IsccCodeResult> {
820 let cleaned: Vec<&str> = codes
822 .iter()
823 .map(|c| c.strip_prefix("ISCC:").unwrap_or(c))
824 .collect();
825
826 if cleaned.len() < 2 {
828 return Err(IsccError::InvalidInput(
829 "at least 2 ISCC unit codes required".into(),
830 ));
831 }
832
833 for code in &cleaned {
835 if code.len() < 16 {
836 return Err(IsccError::InvalidInput(format!(
837 "ISCC unit code too short (min 16 chars): {code}"
838 )));
839 }
840 }
841
842 let mut decoded: Vec<(
844 codec::MainType,
845 codec::SubType,
846 codec::Version,
847 u32,
848 Vec<u8>,
849 )> = Vec::with_capacity(cleaned.len());
850 for code in &cleaned {
851 let raw = codec::decode_base32(code)?;
852 let header = codec::decode_header(&raw)?;
853 decoded.push(header);
854 }
855
856 decoded.sort_by_key(|&(mt, ..)| mt);
858
859 let main_types: Vec<codec::MainType> = decoded.iter().map(|&(mt, ..)| mt).collect();
861
862 let n = main_types.len();
864 if main_types[n - 2] != codec::MainType::Data || main_types[n - 1] != codec::MainType::Instance
865 {
866 return Err(IsccError::InvalidInput(
867 "Data-Code and Instance-Code are mandatory".into(),
868 ));
869 }
870
871 let is_wide = wide
873 && decoded.len() == 2
874 && main_types == [codec::MainType::Data, codec::MainType::Instance]
875 && decoded
876 .iter()
877 .all(|&(mt, st, _, len, _)| codec::decode_length(mt, len, st) >= 128);
878
879 let st = if is_wide {
881 codec::SubType::Wide
882 } else {
883 let sc_subtypes: Vec<codec::SubType> = decoded
885 .iter()
886 .filter(|&&(mt, ..)| mt == codec::MainType::Semantic || mt == codec::MainType::Content)
887 .map(|&(_, st, ..)| st)
888 .collect();
889
890 if !sc_subtypes.is_empty() {
891 let first = sc_subtypes[0];
893 if sc_subtypes.iter().all(|&s| s == first) {
894 first
895 } else {
896 return Err(IsccError::InvalidInput(
897 "mixed SubTypes among Content/Semantic units".into(),
898 ));
899 }
900 } else if decoded.len() == 2 {
901 codec::SubType::Sum
902 } else {
903 codec::SubType::IsccNone
904 }
905 };
906
907 let optional_types = &main_types[..n - 2];
909 let encoded_length = codec::encode_units(optional_types)?;
910
911 let bytes_per_unit = if is_wide { 16 } else { 8 };
913 let mut digest = Vec::with_capacity(decoded.len() * bytes_per_unit);
914 for (_, _, _, _, tail) in &decoded {
915 let take = bytes_per_unit.min(tail.len());
916 digest.extend_from_slice(&tail[..take]);
917 }
918
919 let header = codec::encode_header(
921 codec::MainType::Iscc,
922 st,
923 codec::Version::V0,
924 encoded_length,
925 )?;
926 let mut code_bytes = header;
927 code_bytes.extend_from_slice(&digest);
928 let code = codec::encode_base32(&code_bytes);
929
930 Ok(IsccCodeResult {
932 iscc: format!("ISCC:{code}"),
933 })
934}
935
936#[cfg(test)]
937mod tests {
938 use super::*;
939
940 #[test]
941 fn test_gen_meta_code_v0_title_only() {
942 let result = gen_meta_code_v0("Die Unendliche Geschichte", None, None, 64).unwrap();
943 assert_eq!(result.iscc, "ISCC:AAAZXZ6OU74YAZIM");
944 assert_eq!(result.name, "Die Unendliche Geschichte");
945 assert_eq!(result.description, None);
946 assert_eq!(result.meta, None);
947 }
948
949 #[test]
950 fn test_gen_meta_code_v0_title_description() {
951 let result = gen_meta_code_v0(
952 "Die Unendliche Geschichte",
953 Some("Von Michael Ende"),
954 None,
955 64,
956 )
957 .unwrap();
958 assert_eq!(result.iscc, "ISCC:AAAZXZ6OU4E45RB5");
959 assert_eq!(result.name, "Die Unendliche Geschichte");
960 assert_eq!(result.description, Some("Von Michael Ende".to_string()));
961 assert_eq!(result.meta, None);
962 }
963
964 #[test]
965 fn test_gen_meta_code_v0_json_meta() {
966 let result = gen_meta_code_v0("Hello", None, Some(r#"{"some":"object"}"#), 64).unwrap();
967 assert_eq!(result.iscc, "ISCC:AAAWKLHFXN63LHL2");
968 assert!(result.meta.is_some());
969 assert!(
970 result
971 .meta
972 .unwrap()
973 .starts_with("data:application/json;base64,")
974 );
975 }
976
977 #[test]
978 fn test_gen_meta_code_v0_data_url_meta() {
979 let result = gen_meta_code_v0(
980 "Hello",
981 None,
982 Some("data:application/json;charset=utf-8;base64,eyJzb21lIjogIm9iamVjdCJ9"),
983 64,
984 )
985 .unwrap();
986 assert_eq!(result.iscc, "ISCC:AAAWKLHFXN43ICP2");
987 assert_eq!(
989 result.meta,
990 Some("data:application/json;charset=utf-8;base64,eyJzb21lIjogIm9iamVjdCJ9".to_string())
991 );
992 }
993
994 #[test]
1000 fn test_gen_meta_code_v0_jcs_float_canonicalization() {
1001 let result = gen_meta_code_v0("Test", None, Some(r#"{"value":1.0}"#), 64).unwrap();
1004
1005 assert_eq!(
1007 result.iscc, "ISCC:AAAX4GX3RZH2I6QZ",
1008 "ISCC mismatch: parse_meta_json must use RFC 8785 (JCS) canonicalization"
1009 );
1010 assert_eq!(
1011 result.meta,
1012 Some("data:application/json;base64,eyJ2YWx1ZSI6MX0=".to_string()),
1013 "meta Data-URL mismatch: JCS should serialize 1.0 as 1"
1014 );
1015 assert_eq!(
1016 result.metahash, "1e2010b291d392b6999ffe4aa4661fb343fc371fca3bfb5bb4e8d8226fdf85743232",
1017 "metahash mismatch: canonical bytes differ between JCS and serde_json"
1018 );
1019 }
1020
1021 #[test]
1026 fn test_gen_meta_code_v0_jcs_large_float_canonicalization() {
1027 let result = gen_meta_code_v0("Test", None, Some(r#"{"value":1e20}"#), 64).unwrap();
1028
1029 assert_eq!(
1030 result.iscc, "ISCC:AAAX4GX3R32YH5P7",
1031 "ISCC mismatch: JCS should expand 1e20 to 100000000000000000000"
1032 );
1033 assert_eq!(
1034 result.meta,
1035 Some(
1036 "data:application/json;base64,eyJ2YWx1ZSI6MTAwMDAwMDAwMDAwMDAwMDAwMDAwfQ=="
1037 .to_string()
1038 ),
1039 "meta Data-URL mismatch: JCS should expand large float to integer form"
1040 );
1041 assert_eq!(
1042 result.metahash, "1e201ff83c1822c348717658a0b4713739646da7c59832691b337a457416ddd1c73d",
1043 "metahash mismatch: canonical bytes differ for large float"
1044 );
1045 }
1046
1047 #[test]
1048 fn test_gen_meta_code_v0_invalid_json() {
1049 assert!(matches!(
1050 gen_meta_code_v0("test", None, Some("not json"), 64),
1051 Err(IsccError::InvalidInput(_))
1052 ));
1053 }
1054
1055 #[test]
1056 fn test_gen_meta_code_v0_invalid_data_url() {
1057 assert!(matches!(
1058 gen_meta_code_v0("test", None, Some("data:no-comma-here"), 64),
1059 Err(IsccError::InvalidInput(_))
1060 ));
1061 }
1062
1063 #[test]
1064 fn test_gen_meta_code_v0_conformance() {
1065 let json_str = include_str!("../tests/data.json");
1066 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1067 let section = &data["gen_meta_code_v0"];
1068 let cases = section.as_object().unwrap();
1069
1070 let mut tested = 0;
1071
1072 for (tc_name, tc) in cases {
1073 let inputs = tc["inputs"].as_array().unwrap();
1074 let input_name = inputs[0].as_str().unwrap();
1075 let input_desc = inputs[1].as_str().unwrap();
1076 let meta_val = &inputs[2];
1077 let bits = inputs[3].as_u64().unwrap() as u32;
1078
1079 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1080 let expected_metahash = tc["outputs"]["metahash"].as_str().unwrap();
1081
1082 let meta_arg: Option<String> = match meta_val {
1084 serde_json::Value::Null => None,
1085 serde_json::Value::String(s) => Some(s.clone()),
1086 serde_json::Value::Object(_) => Some(serde_json::to_string(meta_val).unwrap()),
1087 other => panic!("unexpected meta type in {tc_name}: {other:?}"),
1088 };
1089
1090 let desc = if input_desc.is_empty() {
1091 None
1092 } else {
1093 Some(input_desc)
1094 };
1095
1096 let result = gen_meta_code_v0(input_name, desc, meta_arg.as_deref(), bits)
1098 .unwrap_or_else(|e| panic!("gen_meta_code_v0 failed for {tc_name}: {e}"));
1099 assert_eq!(
1100 result.iscc, expected_iscc,
1101 "ISCC mismatch in test case {tc_name}"
1102 );
1103
1104 assert_eq!(
1106 result.metahash, expected_metahash,
1107 "metahash mismatch in test case {tc_name}"
1108 );
1109
1110 if let Some(expected_name) = tc["outputs"].get("name") {
1112 let expected_name = expected_name.as_str().unwrap();
1113 assert_eq!(
1114 result.name, expected_name,
1115 "name mismatch in test case {tc_name}"
1116 );
1117 }
1118
1119 if let Some(expected_desc) = tc["outputs"].get("description") {
1121 let expected_desc = expected_desc.as_str().unwrap();
1122 assert_eq!(
1123 result.description.as_deref(),
1124 Some(expected_desc),
1125 "description mismatch in test case {tc_name}"
1126 );
1127 }
1128
1129 if meta_arg.is_some() {
1131 assert!(
1132 result.meta.is_some(),
1133 "meta should be present in test case {tc_name}"
1134 );
1135 } else {
1136 assert!(
1137 result.meta.is_none(),
1138 "meta should be absent in test case {tc_name}"
1139 );
1140 }
1141
1142 tested += 1;
1143 }
1144
1145 assert_eq!(tested, 16, "expected 16 conformance tests to run");
1146 }
1147
1148 #[test]
1149 fn test_gen_text_code_v0_empty() {
1150 let result = gen_text_code_v0("", 64).unwrap();
1151 assert_eq!(result.iscc, "ISCC:EAASL4F2WZY7KBXB");
1152 assert_eq!(result.characters, 0);
1153 }
1154
1155 #[test]
1156 fn test_gen_text_code_v0_hello_world() {
1157 let result = gen_text_code_v0("Hello World", 64).unwrap();
1158 assert_eq!(result.iscc, "ISCC:EAASKDNZNYGUUF5A");
1159 assert_eq!(result.characters, 10); }
1161
1162 #[test]
1163 fn test_gen_text_code_v0_conformance() {
1164 let json_str = include_str!("../tests/data.json");
1165 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1166 let section = &data["gen_text_code_v0"];
1167 let cases = section.as_object().unwrap();
1168
1169 let mut tested = 0;
1170
1171 for (tc_name, tc) in cases {
1172 let inputs = tc["inputs"].as_array().unwrap();
1173 let input_text = inputs[0].as_str().unwrap();
1174 let bits = inputs[1].as_u64().unwrap() as u32;
1175
1176 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1177 let expected_chars = tc["outputs"]["characters"].as_u64().unwrap() as usize;
1178
1179 let result = gen_text_code_v0(input_text, bits)
1181 .unwrap_or_else(|e| panic!("gen_text_code_v0 failed for {tc_name}: {e}"));
1182 assert_eq!(
1183 result.iscc, expected_iscc,
1184 "ISCC mismatch in test case {tc_name}"
1185 );
1186
1187 assert_eq!(
1189 result.characters, expected_chars,
1190 "character count mismatch in test case {tc_name}"
1191 );
1192
1193 tested += 1;
1194 }
1195
1196 assert_eq!(tested, 5, "expected 5 conformance tests to run");
1197 }
1198
1199 #[test]
1200 fn test_gen_image_code_v0_all_black() {
1201 let pixels = vec![0u8; 1024];
1202 let result = gen_image_code_v0(&pixels, 64).unwrap();
1203 assert_eq!(result.iscc, "ISCC:EEAQAAAAAAAAAAAA");
1204 }
1205
1206 #[test]
1207 fn test_gen_image_code_v0_all_white() {
1208 let pixels = vec![255u8; 1024];
1209 let result = gen_image_code_v0(&pixels, 128).unwrap();
1210 assert_eq!(result.iscc, "ISCC:EEBYAAAAAAAAAAAAAAAAAAAAAAAAA");
1211 }
1212
1213 #[test]
1214 fn test_gen_image_code_v0_invalid_pixel_count() {
1215 assert!(gen_image_code_v0(&[0u8; 100], 64).is_err());
1216 }
1217
1218 #[test]
1219 fn test_gen_image_code_v0_conformance() {
1220 let json_str = include_str!("../tests/data.json");
1221 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1222 let section = &data["gen_image_code_v0"];
1223 let cases = section.as_object().unwrap();
1224
1225 let mut tested = 0;
1226
1227 for (tc_name, tc) in cases {
1228 let inputs = tc["inputs"].as_array().unwrap();
1229 let pixels_json = inputs[0].as_array().unwrap();
1230 let bits = inputs[1].as_u64().unwrap() as u32;
1231 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1232
1233 let pixels: Vec<u8> = pixels_json
1234 .iter()
1235 .map(|v| v.as_u64().unwrap() as u8)
1236 .collect();
1237
1238 let result = gen_image_code_v0(&pixels, bits)
1239 .unwrap_or_else(|e| panic!("gen_image_code_v0 failed for {tc_name}: {e}"));
1240 assert_eq!(
1241 result.iscc, expected_iscc,
1242 "ISCC mismatch in test case {tc_name}"
1243 );
1244
1245 tested += 1;
1246 }
1247
1248 assert_eq!(tested, 3, "expected 3 conformance tests to run");
1249 }
1250
1251 #[test]
1252 fn test_gen_audio_code_v0_empty() {
1253 let result = gen_audio_code_v0(&[], 64).unwrap();
1254 assert_eq!(result.iscc, "ISCC:EIAQAAAAAAAAAAAA");
1255 }
1256
1257 #[test]
1258 fn test_gen_audio_code_v0_single() {
1259 let result = gen_audio_code_v0(&[1], 128).unwrap();
1260 assert_eq!(result.iscc, "ISCC:EIBQAAAAAEAAAAABAAAAAAAAAAAAA");
1261 }
1262
1263 #[test]
1264 fn test_gen_audio_code_v0_negative() {
1265 let result = gen_audio_code_v0(&[-1, 0, 1], 256).unwrap();
1266 assert_eq!(
1267 result.iscc,
1268 "ISCC:EIDQAAAAAH777777AAAAAAAAAAAACAAAAAAP777774AAAAAAAAAAAAI"
1269 );
1270 }
1271
1272 #[test]
1273 fn test_gen_audio_code_v0_conformance() {
1274 let json_str = include_str!("../tests/data.json");
1275 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1276 let section = &data["gen_audio_code_v0"];
1277 let cases = section.as_object().unwrap();
1278
1279 let mut tested = 0;
1280
1281 for (tc_name, tc) in cases {
1282 let inputs = tc["inputs"].as_array().unwrap();
1283 let cv_json = inputs[0].as_array().unwrap();
1284 let bits = inputs[1].as_u64().unwrap() as u32;
1285 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1286
1287 let cv: Vec<i32> = cv_json.iter().map(|v| v.as_i64().unwrap() as i32).collect();
1288
1289 let result = gen_audio_code_v0(&cv, bits)
1290 .unwrap_or_else(|e| panic!("gen_audio_code_v0 failed for {tc_name}: {e}"));
1291 assert_eq!(
1292 result.iscc, expected_iscc,
1293 "ISCC mismatch in test case {tc_name}"
1294 );
1295
1296 tested += 1;
1297 }
1298
1299 assert_eq!(tested, 5, "expected 5 conformance tests to run");
1300 }
1301
1302 #[test]
1303 fn test_array_split_even() {
1304 let data = vec![1, 2, 3, 4];
1305 let parts = array_split(&data, 4);
1306 assert_eq!(parts, vec![&[1][..], &[2][..], &[3][..], &[4][..]]);
1307 }
1308
1309 #[test]
1310 fn test_array_split_remainder() {
1311 let data = vec![1, 2, 3, 4, 5];
1312 let parts = array_split(&data, 3);
1313 assert_eq!(parts, vec![&[1, 2][..], &[3, 4][..], &[5][..]]);
1314 }
1315
1316 #[test]
1317 fn test_array_split_more_parts_than_elements() {
1318 let data = vec![1, 2];
1319 let parts = array_split(&data, 4);
1320 assert_eq!(
1321 parts,
1322 vec![&[1][..], &[2][..], &[][..] as &[i32], &[][..] as &[i32]]
1323 );
1324 }
1325
1326 #[test]
1327 fn test_array_split_empty() {
1328 let data: Vec<i32> = vec![];
1329 let parts = array_split(&data, 3);
1330 assert_eq!(
1331 parts,
1332 vec![&[][..] as &[i32], &[][..] as &[i32], &[][..] as &[i32]]
1333 );
1334 }
1335
1336 #[test]
1337 fn test_gen_video_code_v0_empty_frames() {
1338 let frames: Vec<Vec<i32>> = vec![];
1339 assert!(matches!(
1340 gen_video_code_v0(&frames, 64),
1341 Err(IsccError::InvalidInput(_))
1342 ));
1343 }
1344
1345 #[test]
1346 fn test_gen_video_code_v0_conformance() {
1347 let json_str = include_str!("../tests/data.json");
1348 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1349 let section = &data["gen_video_code_v0"];
1350 let cases = section.as_object().unwrap();
1351
1352 let mut tested = 0;
1353
1354 for (tc_name, tc) in cases {
1355 let inputs = tc["inputs"].as_array().unwrap();
1356 let frames_json = inputs[0].as_array().unwrap();
1357 let bits = inputs[1].as_u64().unwrap() as u32;
1358 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1359
1360 let frame_sigs: Vec<Vec<i32>> = frames_json
1361 .iter()
1362 .map(|frame| {
1363 frame
1364 .as_array()
1365 .unwrap()
1366 .iter()
1367 .map(|v| v.as_i64().unwrap() as i32)
1368 .collect()
1369 })
1370 .collect();
1371
1372 let result = gen_video_code_v0(&frame_sigs, bits)
1373 .unwrap_or_else(|e| panic!("gen_video_code_v0 failed for {tc_name}: {e}"));
1374 assert_eq!(
1375 result.iscc, expected_iscc,
1376 "ISCC mismatch in test case {tc_name}"
1377 );
1378
1379 tested += 1;
1380 }
1381
1382 assert_eq!(tested, 3, "expected 3 conformance tests to run");
1383 }
1384
1385 #[test]
1386 fn test_gen_mixed_code_v0_conformance() {
1387 let json_str = include_str!("../tests/data.json");
1388 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1389 let section = &data["gen_mixed_code_v0"];
1390 let cases = section.as_object().unwrap();
1391
1392 let mut tested = 0;
1393
1394 for (tc_name, tc) in cases {
1395 let inputs = tc["inputs"].as_array().unwrap();
1396 let codes_json = inputs[0].as_array().unwrap();
1397 let bits = inputs[1].as_u64().unwrap() as u32;
1398 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1399 let expected_parts: Vec<&str> = tc["outputs"]["parts"]
1400 .as_array()
1401 .unwrap()
1402 .iter()
1403 .map(|v| v.as_str().unwrap())
1404 .collect();
1405
1406 let codes: Vec<&str> = codes_json.iter().map(|v| v.as_str().unwrap()).collect();
1407
1408 let result = gen_mixed_code_v0(&codes, bits)
1409 .unwrap_or_else(|e| panic!("gen_mixed_code_v0 failed for {tc_name}: {e}"));
1410 assert_eq!(
1411 result.iscc, expected_iscc,
1412 "ISCC mismatch in test case {tc_name}"
1413 );
1414
1415 let result_parts: Vec<&str> = result.parts.iter().map(|s| s.as_str()).collect();
1417 assert_eq!(
1418 result_parts, expected_parts,
1419 "parts mismatch in test case {tc_name}"
1420 );
1421
1422 tested += 1;
1423 }
1424
1425 assert_eq!(tested, 2, "expected 2 conformance tests to run");
1426 }
1427
1428 #[test]
1429 fn test_gen_mixed_code_v0_too_few_codes() {
1430 assert!(matches!(
1431 gen_mixed_code_v0(&["EUA6GIKXN42IQV3S"], 64),
1432 Err(IsccError::InvalidInput(_))
1433 ));
1434 }
1435
1436 fn make_content_code_raw(stype: codec::SubType, bit_length: u32) -> Vec<u8> {
1438 let nbytes = (bit_length / 8) as usize;
1439 let body: Vec<u8> = (0..nbytes).map(|i| (i & 0xFF) as u8).collect();
1440 let base32 = codec::encode_component(
1441 codec::MainType::Content,
1442 stype,
1443 codec::Version::V0,
1444 bit_length,
1445 &body,
1446 )
1447 .unwrap();
1448 codec::decode_base32(&base32).unwrap()
1449 }
1450
1451 #[test]
1452 fn test_soft_hash_codes_v0_rejects_short_code() {
1453 let code_64 = make_content_code_raw(codec::SubType::None, 64);
1455 let code_32 = make_content_code_raw(codec::SubType::Image, 32);
1456 let result = soft_hash_codes_v0(&[code_64, code_32], 64);
1457 assert!(
1458 matches!(&result, Err(IsccError::InvalidInput(msg)) if msg.contains("too short")),
1459 "expected InvalidInput with 'too short', got {result:?}"
1460 );
1461 }
1462
1463 #[test]
1464 fn test_soft_hash_codes_v0_accepts_exact_length() {
1465 let code_a = make_content_code_raw(codec::SubType::None, 64);
1467 let code_b = make_content_code_raw(codec::SubType::Image, 64);
1468 let result = soft_hash_codes_v0(&[code_a, code_b], 64);
1469 assert!(result.is_ok(), "expected Ok, got {result:?}");
1470 }
1471
1472 #[test]
1473 fn test_soft_hash_codes_v0_accepts_longer_codes() {
1474 let code_a = make_content_code_raw(codec::SubType::None, 128);
1476 let code_b = make_content_code_raw(codec::SubType::Audio, 128);
1477 let result = soft_hash_codes_v0(&[code_a, code_b], 64);
1478 assert!(result.is_ok(), "expected Ok, got {result:?}");
1479 }
1480
1481 #[test]
1482 fn test_gen_data_code_v0_conformance() {
1483 let json_str = include_str!("../tests/data.json");
1484 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1485 let section = &data["gen_data_code_v0"];
1486 let cases = section.as_object().unwrap();
1487
1488 let mut tested = 0;
1489
1490 for (tc_name, tc) in cases {
1491 let inputs = tc["inputs"].as_array().unwrap();
1492 let stream_str = inputs[0].as_str().unwrap();
1493 let bits = inputs[1].as_u64().unwrap() as u32;
1494 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1495
1496 let hex_data = stream_str
1498 .strip_prefix("stream:")
1499 .unwrap_or_else(|| panic!("expected 'stream:' prefix in test case {tc_name}"));
1500 let input_bytes = hex::decode(hex_data)
1501 .unwrap_or_else(|e| panic!("invalid hex in test case {tc_name}: {e}"));
1502
1503 let result = gen_data_code_v0(&input_bytes, bits)
1504 .unwrap_or_else(|e| panic!("gen_data_code_v0 failed for {tc_name}: {e}"));
1505 assert_eq!(
1506 result.iscc, expected_iscc,
1507 "ISCC mismatch in test case {tc_name}"
1508 );
1509
1510 tested += 1;
1511 }
1512
1513 assert_eq!(tested, 4, "expected 4 conformance tests to run");
1514 }
1515
1516 #[test]
1517 fn test_gen_instance_code_v0_empty() {
1518 let result = gen_instance_code_v0(b"", 64).unwrap();
1519 assert_eq!(result.iscc, "ISCC:IAA26E2JXH27TING");
1520 assert_eq!(result.filesize, 0);
1521 assert_eq!(
1522 result.datahash,
1523 "1e20af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262"
1524 );
1525 }
1526
1527 #[test]
1528 fn test_gen_instance_code_v0_conformance() {
1529 let json_str = include_str!("../tests/data.json");
1530 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1531 let section = &data["gen_instance_code_v0"];
1532 let cases = section.as_object().unwrap();
1533
1534 for (name, tc) in cases {
1535 let inputs = tc["inputs"].as_array().unwrap();
1536 let stream_str = inputs[0].as_str().unwrap();
1537 let bits = inputs[1].as_u64().unwrap() as u32;
1538 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1539
1540 let hex_data = stream_str
1542 .strip_prefix("stream:")
1543 .unwrap_or_else(|| panic!("expected 'stream:' prefix in test case {name}"));
1544 let input_bytes = hex::decode(hex_data)
1545 .unwrap_or_else(|e| panic!("invalid hex in test case {name}: {e}"));
1546
1547 let result = gen_instance_code_v0(&input_bytes, bits)
1548 .unwrap_or_else(|e| panic!("gen_instance_code_v0 failed for {name}: {e}"));
1549 assert_eq!(
1550 result.iscc, expected_iscc,
1551 "ISCC mismatch in test case {name}"
1552 );
1553
1554 if let Some(expected_datahash) = tc["outputs"].get("datahash") {
1556 let expected_datahash = expected_datahash.as_str().unwrap();
1557 assert_eq!(
1558 result.datahash, expected_datahash,
1559 "datahash mismatch in test case {name}"
1560 );
1561 }
1562
1563 if let Some(expected_filesize) = tc["outputs"].get("filesize") {
1565 let expected_filesize = expected_filesize.as_u64().unwrap();
1566 assert_eq!(
1567 result.filesize, expected_filesize,
1568 "filesize mismatch in test case {name}"
1569 );
1570 }
1571
1572 assert_eq!(
1574 result.filesize,
1575 input_bytes.len() as u64,
1576 "filesize should match input length in test case {name}"
1577 );
1578 }
1579 }
1580
1581 #[test]
1582 fn test_gen_iscc_code_v0_conformance() {
1583 let json_str = include_str!("../tests/data.json");
1584 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1585 let section = &data["gen_iscc_code_v0"];
1586 let cases = section.as_object().unwrap();
1587
1588 let mut tested = 0;
1589
1590 for (tc_name, tc) in cases {
1591 let inputs = tc["inputs"].as_array().unwrap();
1592 let codes_json = inputs[0].as_array().unwrap();
1593 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1594
1595 let codes: Vec<&str> = codes_json.iter().map(|v| v.as_str().unwrap()).collect();
1596
1597 let result = gen_iscc_code_v0(&codes, false)
1598 .unwrap_or_else(|e| panic!("gen_iscc_code_v0 failed for {tc_name}: {e}"));
1599 assert_eq!(
1600 result.iscc, expected_iscc,
1601 "ISCC mismatch in test case {tc_name}"
1602 );
1603
1604 tested += 1;
1605 }
1606
1607 assert_eq!(tested, 5, "expected 5 conformance tests to run");
1608 }
1609
1610 #[test]
1611 fn test_gen_iscc_code_v0_too_few_codes() {
1612 assert!(matches!(
1613 gen_iscc_code_v0(&["AAAWKLHFPV6OPKDG"], false),
1614 Err(IsccError::InvalidInput(_))
1615 ));
1616 }
1617
1618 #[test]
1619 fn test_gen_iscc_code_v0_missing_instance() {
1620 assert!(matches!(
1622 gen_iscc_code_v0(&["AAAWKLHFPV6OPKDG", "AAAWKLHFPV6OPKDG"], false),
1623 Err(IsccError::InvalidInput(_))
1624 ));
1625 }
1626
1627 #[test]
1628 fn test_gen_iscc_code_v0_short_code() {
1629 assert!(matches!(
1631 gen_iscc_code_v0(&["AAAWKLHFPV6", "AAAWKLHFPV6OPKDG"], false),
1632 Err(IsccError::InvalidInput(_))
1633 ));
1634 }
1635
1636 #[test]
1643 fn test_gen_meta_code_empty_data_url_enters_meta_branch() {
1644 let result =
1645 gen_meta_code_v0("Test", None, Some("data:application/json;base64,"), 64).unwrap();
1646
1647 assert_eq!(result.name, "Test");
1649
1650 assert_eq!(
1652 result.meta,
1653 Some("data:application/json;base64,".to_string()),
1654 "empty Data-URL payload should still enter meta branch"
1655 );
1656
1657 let expected_metahash = utils::multi_hash_blake3(&[]);
1659 assert_eq!(
1660 result.metahash, expected_metahash,
1661 "metahash should be BLAKE3 of empty bytes"
1662 );
1663 }
1664
1665 #[test]
1671 fn test_soft_hash_meta_v0_with_bytes_empty_equals_name_only() {
1672 let name_only = soft_hash_meta_v0("test", None);
1673 let empty_bytes = soft_hash_meta_v0_with_bytes("test", &[]);
1674 assert_eq!(
1675 name_only, empty_bytes,
1676 "empty bytes should produce same digest as name-only (no interleaving)"
1677 );
1678 }
1679
1680 #[test]
1683 fn test_meta_trim_name_value() {
1684 assert_eq!(META_TRIM_NAME, 128);
1685 }
1686
1687 #[test]
1688 fn test_meta_trim_description_value() {
1689 assert_eq!(META_TRIM_DESCRIPTION, 4096);
1690 }
1691
1692 #[test]
1693 fn test_io_read_size_value() {
1694 assert_eq!(IO_READ_SIZE, 4_194_304);
1695 }
1696
1697 #[test]
1698 fn test_text_ngram_size_value() {
1699 assert_eq!(TEXT_NGRAM_SIZE, 13);
1700 }
1701
1702 #[test]
1706 fn test_encode_component_matches_codec() {
1707 let digest = [0xABu8; 8];
1708 let tier1 = encode_component(3, 0, 0, 64, &digest).unwrap();
1709 let tier2 = codec::encode_component(
1710 codec::MainType::Data,
1711 codec::SubType::None,
1712 codec::Version::V0,
1713 64,
1714 &digest,
1715 )
1716 .unwrap();
1717 assert_eq!(tier1, tier2);
1718 }
1719
1720 #[test]
1722 fn test_encode_component_round_trip() {
1723 let digest = [0x42u8; 32];
1724 let result = encode_component(0, 0, 0, 64, &digest).unwrap();
1725 assert!(!result.is_empty());
1727 }
1728
1729 #[test]
1731 fn test_encode_component_rejects_iscc() {
1732 let result = encode_component(5, 0, 0, 64, &[0u8; 8]);
1733 assert!(result.is_err());
1734 }
1735
1736 #[test]
1738 fn test_encode_component_rejects_short_digest() {
1739 let result = encode_component(0, 0, 0, 64, &[0u8; 4]);
1740 assert!(result.is_err());
1741 let err = result.unwrap_err().to_string();
1742 assert!(
1743 err.contains("digest length 4 < bit_length/8 (8)"),
1744 "unexpected error: {err}"
1745 );
1746 }
1747
1748 #[test]
1750 fn test_encode_component_rejects_invalid_mtype() {
1751 let result = encode_component(99, 0, 0, 64, &[0u8; 8]);
1752 assert!(result.is_err());
1753 }
1754
1755 #[test]
1757 fn test_encode_component_rejects_invalid_stype() {
1758 let result = encode_component(0, 99, 0, 64, &[0u8; 8]);
1759 assert!(result.is_err());
1760 }
1761
1762 #[test]
1764 fn test_encode_component_rejects_invalid_version() {
1765 let result = encode_component(0, 0, 99, 64, &[0u8; 8]);
1766 assert!(result.is_err());
1767 }
1768
1769 #[test]
1773 fn test_iscc_decode_round_trip_meta() {
1774 let digest = [0xaa_u8; 8];
1775 let encoded = encode_component(0, 0, 0, 64, &digest).unwrap();
1776 let (mt, st, vs, li, decoded_digest) = iscc_decode(&encoded).unwrap();
1777 assert_eq!(mt, 0, "MainType::Meta");
1778 assert_eq!(st, 0, "SubType::None");
1779 assert_eq!(vs, 0, "Version::V0");
1780 assert_eq!(li, 1, "length_index");
1782 assert_eq!(decoded_digest, digest.to_vec());
1783 }
1784
1785 #[test]
1787 fn test_iscc_decode_round_trip_content() {
1788 let digest = [0xbb_u8; 8];
1789 let encoded = encode_component(2, 0, 0, 64, &digest).unwrap();
1790 let (mt, st, vs, _li, decoded_digest) = iscc_decode(&encoded).unwrap();
1791 assert_eq!(mt, 2, "MainType::Content");
1792 assert_eq!(st, 0, "SubType::TEXT");
1793 assert_eq!(vs, 0, "Version::V0");
1794 assert_eq!(decoded_digest, digest.to_vec());
1795 }
1796
1797 #[test]
1799 fn test_iscc_decode_round_trip_data() {
1800 let digest = [0xcc_u8; 8];
1801 let encoded = encode_component(3, 0, 0, 64, &digest).unwrap();
1802 let (mt, _st, _vs, _li, decoded_digest) = iscc_decode(&encoded).unwrap();
1803 assert_eq!(mt, 3, "MainType::Data");
1804 assert_eq!(decoded_digest, digest.to_vec());
1805 }
1806
1807 #[test]
1809 fn test_iscc_decode_round_trip_instance() {
1810 let digest = [0xdd_u8; 8];
1811 let encoded = encode_component(4, 0, 0, 64, &digest).unwrap();
1812 let (mt, _st, _vs, _li, decoded_digest) = iscc_decode(&encoded).unwrap();
1813 assert_eq!(mt, 4, "MainType::Instance");
1814 assert_eq!(decoded_digest, digest.to_vec());
1815 }
1816
1817 #[test]
1819 fn test_iscc_decode_with_prefix() {
1820 let digest = [0xaa_u8; 8];
1821 let encoded = encode_component(0, 0, 0, 64, &digest).unwrap();
1822 let with_prefix = format!("ISCC:{encoded}");
1823 let (mt, st, vs, li, decoded_digest) = iscc_decode(&with_prefix).unwrap();
1824 assert_eq!(mt, 0);
1825 assert_eq!(st, 0);
1826 assert_eq!(vs, 0);
1827 assert_eq!(li, 1);
1828 assert_eq!(decoded_digest, digest.to_vec());
1829 }
1830
1831 #[test]
1833 fn test_iscc_decode_with_dashes() {
1834 let digest = [0xaa_u8; 8];
1835 let encoded = encode_component(0, 0, 0, 64, &digest).unwrap();
1836 let with_dashes = format!("{}-{}-{}", &encoded[..4], &encoded[4..8], &encoded[8..]);
1838 let (mt, st, vs, li, decoded_digest) = iscc_decode(&with_dashes).unwrap();
1839 assert_eq!(mt, 0);
1840 assert_eq!(st, 0);
1841 assert_eq!(vs, 0);
1842 assert_eq!(li, 1);
1843 assert_eq!(decoded_digest, digest.to_vec());
1844 }
1845
1846 #[test]
1848 fn test_iscc_decode_invalid_base32() {
1849 let result = iscc_decode("!!!INVALID!!!");
1850 assert!(result.is_err());
1851 let err = result.unwrap_err().to_string();
1852 assert!(err.contains("base32"), "expected base32 error: {err}");
1853 }
1854
1855 #[test]
1858 fn test_iscc_decode_known_meta_code() {
1859 let (mt, st, vs, li, digest) = iscc_decode("ISCC:AAAZXZ6OU74YAZIM").unwrap();
1860 assert_eq!(mt, 0, "MainType::Meta");
1861 assert_eq!(st, 0, "SubType::None");
1862 assert_eq!(vs, 0, "Version::V0");
1863 assert_eq!(li, 1, "length_index for 64-bit");
1864 assert_eq!(digest.len(), 8, "64-bit = 8 bytes");
1865 }
1866
1867 #[test]
1870 fn test_iscc_decode_known_instance_code() {
1871 let (mt, st, vs, li, digest) = iscc_decode("ISCC:IAA26E2JXH27TING").unwrap();
1872 assert_eq!(mt, 4, "MainType::Instance");
1873 assert_eq!(st, 0, "SubType::None");
1874 assert_eq!(vs, 0, "Version::V0");
1875 assert_eq!(li, 1, "length_index for 64-bit");
1876 assert_eq!(digest.len(), 8, "64-bit = 8 bytes");
1877 }
1878
1879 #[test]
1882 fn test_iscc_decode_known_data_code() {
1883 let (mt, st, vs, _li, digest) = iscc_decode("ISCC:GAAXL2XYM5BQIAZ3").unwrap();
1884 assert_eq!(mt, 3, "MainType::Data");
1885 assert_eq!(st, 0, "SubType::None");
1886 assert_eq!(vs, 0, "Version::V0");
1887 assert_eq!(digest.len(), 8, "64-bit = 8 bytes");
1888 }
1889
1890 #[test]
1893 fn test_iscc_decode_verification_round_trip() {
1894 let digest = [0xaa_u8; 8];
1895 let encoded = encode_component(0, 0, 0, 64, &digest).unwrap();
1896 let result = iscc_decode(&encoded).unwrap();
1897 assert_eq!(result, (0, 0, 0, 1, vec![0xaa; 8]));
1898 }
1899
1900 #[test]
1902 fn test_iscc_decode_truncated_input() {
1903 let digest = [0xff_u8; 32];
1905 let encoded = encode_component(0, 0, 0, 256, &digest).unwrap();
1906 let truncated = &encoded[..6];
1908 let result = iscc_decode(truncated);
1909 assert!(result.is_err(), "should fail on truncated input");
1910 }
1911
1912 #[test]
1916 fn test_json_to_data_url_basic() {
1917 let url = json_to_data_url(r#"{"key": "value"}"#).unwrap();
1918 assert!(
1919 url.starts_with("data:application/json;base64,"),
1920 "expected application/json prefix, got: {url}"
1921 );
1922 }
1923
1924 #[test]
1926 fn test_json_to_data_url_ld_json() {
1927 let url = json_to_data_url(r#"{"@context": "https://schema.org"}"#).unwrap();
1928 assert!(
1929 url.starts_with("data:application/ld+json;base64,"),
1930 "expected application/ld+json prefix, got: {url}"
1931 );
1932 }
1933
1934 #[test]
1936 fn test_json_to_data_url_jcs_ordering() {
1937 let url = json_to_data_url(r#"{"b":1,"a":2}"#).unwrap();
1938 let b64 = url.split_once(',').unwrap().1;
1940 let decoded = data_encoding::BASE64.decode(b64.as_bytes()).unwrap();
1941 let canonical = std::str::from_utf8(&decoded).unwrap();
1942 assert_eq!(canonical, r#"{"a":2,"b":1}"#, "JCS should sort keys");
1943 }
1944
1945 #[test]
1948 fn test_json_to_data_url_round_trip() {
1949 let input = r#"{"hello": "world", "num": 42}"#;
1950 let url = json_to_data_url(input).unwrap();
1951 let decoded_bytes = decode_data_url(&url).unwrap();
1952 let canonical: serde_json::Value =
1954 serde_json::from_slice(&decoded_bytes).expect("decoded bytes should be valid JSON");
1955 let original: serde_json::Value = serde_json::from_str(input).unwrap();
1956 assert_eq!(canonical, original, "round-trip preserves JSON semantics");
1957 }
1958
1959 #[test]
1961 fn test_json_to_data_url_invalid_json() {
1962 let result = json_to_data_url("not json");
1963 assert!(result.is_err(), "should reject invalid JSON");
1964 let err = result.unwrap_err().to_string();
1965 assert!(
1966 err.contains("invalid JSON"),
1967 "expected 'invalid JSON' in error: {err}"
1968 );
1969 }
1970
1971 #[test]
1984 fn test_json_to_data_url_conformance_0016() {
1985 let url = json_to_data_url(r#"{"some": "object"}"#).unwrap();
1986 assert!(
1988 url.starts_with("data:application/json;base64,"),
1989 "expected application/json prefix"
1990 );
1991 let b64 = url.split_once(',').unwrap().1;
1993 let decoded = data_encoding::BASE64.decode(b64.as_bytes()).unwrap();
1994 let canonical = std::str::from_utf8(&decoded).unwrap();
1995 assert_eq!(
1996 canonical, r#"{"some":"object"}"#,
1997 "JCS removes whitespace from JSON"
1998 );
1999 }
2000}