1pub mod cdc;
8pub mod codec;
9pub mod conformance;
10pub(crate) mod dct;
11pub mod minhash;
12pub mod simhash;
13pub mod streaming;
14pub mod types;
15pub mod utils;
16pub(crate) mod wtahash;
17
18pub use cdc::alg_cdc_chunks;
19pub use codec::encode_base64;
20pub use codec::iscc_decompose;
21pub use conformance::conformance_selftest;
22pub use minhash::alg_minhash_256;
23pub use simhash::{alg_simhash, sliding_window};
24pub use streaming::{DataHasher, InstanceHasher};
25pub use types::*;
26pub use utils::{text_clean, text_collapse, text_remove_newlines, text_trim};
27
28pub const META_TRIM_NAME: usize = 128;
30
31pub const META_TRIM_DESCRIPTION: usize = 4096;
33
34pub const META_TRIM_META: usize = 128_000;
36
37pub const IO_READ_SIZE: usize = 4_194_304;
39
40pub const TEXT_NGRAM_SIZE: usize = 13;
42
43#[derive(Debug, thiserror::Error)]
45pub enum IsccError {
46 #[error("invalid input: {0}")]
48 InvalidInput(String),
49}
50
51pub type IsccResult<T> = Result<T, IsccError>;
53
54fn interleave_digests(a: &[u8], b: &[u8]) -> Vec<u8> {
60 let mut result = vec![0u8; 32];
61 for chunk in 0..4 {
62 let src = chunk * 4;
63 let dst_a = chunk * 8;
64 let dst_b = chunk * 8 + 4;
65 result[dst_a..dst_a + 4].copy_from_slice(&a[src..src + 4]);
66 result[dst_b..dst_b + 4].copy_from_slice(&b[src..src + 4]);
67 }
68 result
69}
70
71fn meta_name_simhash(name: &str) -> Vec<u8> {
76 let collapsed_name = utils::text_collapse(name);
77 let name_ngrams = simhash::sliding_window_strs(&collapsed_name, 3);
78 let name_hashes: Vec<[u8; 32]> = name_ngrams
79 .iter()
80 .map(|ng| *blake3::hash(ng.as_bytes()).as_bytes())
81 .collect();
82 simhash::alg_simhash_inner(&name_hashes)
83}
84
85fn soft_hash_meta_v0(name: &str, extra: Option<&str>) -> Vec<u8> {
90 let name_simhash = meta_name_simhash(name);
91
92 match extra {
93 None | Some("") => name_simhash,
94 Some(extra_str) => {
95 let collapsed_extra = utils::text_collapse(extra_str);
96 let extra_ngrams = simhash::sliding_window_strs(&collapsed_extra, 3);
97 let extra_hashes: Vec<[u8; 32]> = extra_ngrams
98 .iter()
99 .map(|ng| *blake3::hash(ng.as_bytes()).as_bytes())
100 .collect();
101 let extra_simhash = simhash::alg_simhash_inner(&extra_hashes);
102
103 interleave_digests(&name_simhash, &extra_simhash)
104 }
105 }
106}
107
108fn soft_hash_meta_v0_with_bytes(name: &str, extra: &[u8]) -> Vec<u8> {
114 let name_simhash = meta_name_simhash(name);
115
116 if extra.is_empty() {
117 return name_simhash;
118 }
119
120 let byte_ngrams = simhash::sliding_window_bytes(extra, 4);
121 let byte_hashes: Vec<[u8; 32]> = byte_ngrams
122 .iter()
123 .map(|ng| *blake3::hash(ng).as_bytes())
124 .collect();
125 let byte_simhash = simhash::alg_simhash_inner(&byte_hashes);
126
127 interleave_digests(&name_simhash, &byte_simhash)
128}
129
130fn decode_data_url(data_url: &str) -> IsccResult<Vec<u8>> {
136 let payload_b64 = data_url
137 .split_once(',')
138 .map(|(_, b64)| b64)
139 .ok_or_else(|| IsccError::InvalidInput("Data-URL missing comma separator".into()))?;
140 data_encoding::BASE64
141 .decode(payload_b64.as_bytes())
142 .map_err(|e| IsccError::InvalidInput(format!("invalid base64 in Data-URL: {e}")))
143}
144
145fn parse_meta_json(meta_str: &str) -> IsccResult<Vec<u8>> {
147 let parsed: serde_json::Value = serde_json::from_str(meta_str)
148 .map_err(|e| IsccError::InvalidInput(format!("invalid JSON in meta: {e}")))?;
149 let mut buf = Vec::new();
150 serde_json_canonicalizer::to_writer(&parsed, &mut buf)
151 .map_err(|e| IsccError::InvalidInput(format!("JSON canonicalization failed: {e}")))?;
152 Ok(buf)
153}
154
155fn build_meta_data_url(json_bytes: &[u8], json_value: &serde_json::Value) -> String {
160 let media_type = if json_value.get("@context").is_some() {
161 "application/ld+json"
162 } else {
163 "application/json"
164 };
165 let b64 = data_encoding::BASE64.encode(json_bytes);
166 format!("data:{media_type};base64,{b64}")
167}
168
169pub fn encode_component(
179 mtype: u8,
180 stype: u8,
181 version: u8,
182 bit_length: u32,
183 digest: &[u8],
184) -> IsccResult<String> {
185 let mt = codec::MainType::try_from(mtype)?;
186 let st = codec::SubType::try_from(stype)?;
187 let vs = codec::Version::try_from(version)?;
188 let needed = (bit_length / 8) as usize;
189 if digest.len() < needed {
190 return Err(IsccError::InvalidInput(format!(
191 "digest length {} < bit_length/8 ({})",
192 digest.len(),
193 needed
194 )));
195 }
196 codec::encode_component(mt, st, vs, bit_length, digest)
197}
198
199pub fn iscc_decode(iscc: &str) -> IsccResult<(u8, u8, u8, u8, Vec<u8>)> {
214 let clean = iscc.strip_prefix("ISCC:").unwrap_or(iscc);
216 let clean = clean.replace('-', "");
218 let raw = codec::decode_base32(&clean)?;
219 let (mt, st, vs, length_index, tail) = codec::decode_header(&raw)?;
220 let bit_length = codec::decode_length(mt, length_index, st);
221 let nbytes = (bit_length / 8) as usize;
222 if tail.len() < nbytes {
223 return Err(IsccError::InvalidInput(format!(
224 "decoded body too short: expected {nbytes} digest bytes, got {}",
225 tail.len()
226 )));
227 }
228 Ok((
229 mt as u8,
230 st as u8,
231 vs as u8,
232 length_index as u8,
233 tail[..nbytes].to_vec(),
234 ))
235}
236
237pub fn json_to_data_url(json: &str) -> IsccResult<String> {
264 let parsed: serde_json::Value = serde_json::from_str(json)
265 .map_err(|e| IsccError::InvalidInput(format!("invalid JSON: {e}")))?;
266 let mut canonical_bytes = Vec::new();
267 serde_json_canonicalizer::to_writer(&parsed, &mut canonical_bytes)
268 .map_err(|e| IsccError::InvalidInput(format!("JSON canonicalization failed: {e}")))?;
269 Ok(build_meta_data_url(&canonical_bytes, &parsed))
270}
271
272pub fn gen_meta_code_v0(
280 name: &str,
281 description: Option<&str>,
282 meta: Option<&str>,
283 bits: u32,
284) -> IsccResult<MetaCodeResult> {
285 let name = utils::text_clean(name);
287 let name = utils::text_remove_newlines(&name);
288 let name = utils::text_trim(&name, META_TRIM_NAME);
289
290 if name.is_empty() {
291 return Err(IsccError::InvalidInput(
292 "name is empty after normalization".into(),
293 ));
294 }
295
296 let desc_str = description.unwrap_or("");
298 let desc_clean = utils::text_clean(desc_str);
299 let desc_clean = utils::text_trim(&desc_clean, META_TRIM_DESCRIPTION);
300
301 if let Some(meta_str) = meta {
303 const PRE_DECODE_LIMIT: usize = META_TRIM_META * 4 / 3 + 256;
304 if meta_str.len() > PRE_DECODE_LIMIT {
305 return Err(IsccError::InvalidInput(format!(
306 "meta string exceeds size limit ({} > {PRE_DECODE_LIMIT} bytes)",
307 meta_str.len()
308 )));
309 }
310 }
311
312 let meta_payload: Option<Vec<u8>> = match meta {
314 Some(meta_str) if meta_str.starts_with("data:") => Some(decode_data_url(meta_str)?),
315 Some(meta_str) => Some(parse_meta_json(meta_str)?),
316 None => None,
317 };
318
319 if let Some(ref payload) = meta_payload {
321 if payload.len() > META_TRIM_META {
322 return Err(IsccError::InvalidInput(format!(
323 "decoded meta payload exceeds size limit ({} > {META_TRIM_META} bytes)",
324 payload.len()
325 )));
326 }
327 }
328
329 if let Some(ref payload) = meta_payload {
331 let meta_code_digest = soft_hash_meta_v0_with_bytes(&name, payload);
332 let metahash = utils::multi_hash_blake3(payload);
333
334 let meta_code = codec::encode_component(
335 codec::MainType::Meta,
336 codec::SubType::None,
337 codec::Version::V0,
338 bits,
339 &meta_code_digest,
340 )?;
341
342 let meta_value = match meta {
344 Some(meta_str) if meta_str.starts_with("data:") => meta_str.to_string(),
345 Some(meta_str) => {
346 let parsed: serde_json::Value = serde_json::from_str(meta_str)
347 .map_err(|e| IsccError::InvalidInput(format!("invalid JSON: {e}")))?;
348 build_meta_data_url(payload, &parsed)
349 }
350 None => unreachable!(),
351 };
352
353 Ok(MetaCodeResult {
354 iscc: format!("ISCC:{meta_code}"),
355 name: name.clone(),
356 description: if desc_clean.is_empty() {
357 None
358 } else {
359 Some(desc_clean)
360 },
361 meta: Some(meta_value),
362 metahash,
363 })
364 } else {
365 let payload = if desc_clean.is_empty() {
367 name.clone()
368 } else {
369 format!("{name} {desc_clean}")
370 };
371 let payload = payload.trim().to_string();
372 let metahash = utils::multi_hash_blake3(payload.as_bytes());
373
374 let extra = if desc_clean.is_empty() {
376 None
377 } else {
378 Some(desc_clean.as_str())
379 };
380 let meta_code_digest = soft_hash_meta_v0(&name, extra);
381
382 let meta_code = codec::encode_component(
383 codec::MainType::Meta,
384 codec::SubType::None,
385 codec::Version::V0,
386 bits,
387 &meta_code_digest,
388 )?;
389
390 Ok(MetaCodeResult {
391 iscc: format!("ISCC:{meta_code}"),
392 name: name.clone(),
393 description: if desc_clean.is_empty() {
394 None
395 } else {
396 Some(desc_clean)
397 },
398 meta: None,
399 metahash,
400 })
401 }
402}
403
404fn soft_hash_text_v0(text: &str) -> Vec<u8> {
409 let ngrams = simhash::sliding_window_strs(text, TEXT_NGRAM_SIZE);
410 let features: Vec<u32> = ngrams
411 .iter()
412 .map(|ng| xxhash_rust::xxh32::xxh32(ng.as_bytes(), 0))
413 .collect();
414 minhash::alg_minhash_256(&features)
415}
416
417pub fn gen_text_code_v0(text: &str, bits: u32) -> IsccResult<TextCodeResult> {
423 let collapsed = utils::text_collapse(text);
424 let characters = collapsed.chars().count();
425 let hash_digest = soft_hash_text_v0(&collapsed);
426 let component = codec::encode_component(
427 codec::MainType::Content,
428 codec::SubType::TEXT,
429 codec::Version::V0,
430 bits,
431 &hash_digest,
432 )?;
433 Ok(TextCodeResult {
434 iscc: format!("ISCC:{component}"),
435 characters,
436 })
437}
438
439fn transpose_matrix(matrix: &[Vec<f64>]) -> Vec<Vec<f64>> {
441 let rows = matrix.len();
442 if rows == 0 {
443 return vec![];
444 }
445 let cols = matrix[0].len();
446 let mut result = vec![vec![0.0f64; rows]; cols];
447 for (r, row) in matrix.iter().enumerate() {
448 for (c, &val) in row.iter().enumerate() {
449 result[c][r] = val;
450 }
451 }
452 result
453}
454
455fn flatten_8x8(matrix: &[Vec<f64>], col: usize, row: usize) -> Vec<f64> {
460 let mut flat = Vec::with_capacity(64);
461 for matrix_row in matrix.iter().skip(row).take(8) {
462 for &val in matrix_row.iter().skip(col).take(8) {
463 flat.push(val);
464 }
465 }
466 flat
467}
468
469fn compute_median(values: &[f64]) -> f64 {
474 let mut sorted: Vec<f64> = values.to_vec();
475 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
476 let n = sorted.len();
477 if n % 2 == 1 {
478 sorted[n / 2]
479 } else {
480 (sorted[n / 2 - 1] + sorted[n / 2]) / 2.0
481 }
482}
483
484fn bits_to_bytes(bits: &[bool]) -> Vec<u8> {
486 bits.chunks(8)
487 .map(|chunk| {
488 let mut byte = 0u8;
489 for (i, &bit) in chunk.iter().enumerate() {
490 if bit {
491 byte |= 1 << (7 - i);
492 }
493 }
494 byte
495 })
496 .collect()
497}
498
499fn soft_hash_image_v0(pixels: &[u8], bits: u32) -> IsccResult<Vec<u8>> {
505 if pixels.len() != 1024 {
506 return Err(IsccError::InvalidInput(format!(
507 "expected 1024 pixels, got {}",
508 pixels.len()
509 )));
510 }
511 if bits > 256 {
512 return Err(IsccError::InvalidInput(format!(
513 "bits must be <= 256, got {bits}"
514 )));
515 }
516
517 let rows: Vec<Vec<f64>> = pixels
519 .chunks(32)
520 .map(|row| {
521 let row_f64: Vec<f64> = row.iter().map(|&p| p as f64).collect();
522 dct::alg_dct(&row_f64)
523 })
524 .collect::<IsccResult<Vec<Vec<f64>>>>()?;
525
526 let transposed = transpose_matrix(&rows);
528
529 let dct_cols: Vec<Vec<f64>> = transposed
531 .iter()
532 .map(|col| dct::alg_dct(col))
533 .collect::<IsccResult<Vec<Vec<f64>>>>()?;
534
535 let dct_matrix = transpose_matrix(&dct_cols);
537
538 let positions = [(0, 0), (1, 0), (0, 1), (1, 1)];
540 let mut bitstring = Vec::<bool>::with_capacity(256);
541
542 for (col, row) in positions {
543 let flat = flatten_8x8(&dct_matrix, col, row);
544 let median = compute_median(&flat);
545 for val in &flat {
546 bitstring.push(*val > median);
547 }
548 if bitstring.len() >= bits as usize {
549 break;
550 }
551 }
552
553 Ok(bits_to_bytes(&bitstring[..bits as usize]))
555}
556
557pub fn gen_image_code_v0(pixels: &[u8], bits: u32) -> IsccResult<ImageCodeResult> {
563 let hash_digest = soft_hash_image_v0(pixels, bits)?;
564 let component = codec::encode_component(
565 codec::MainType::Content,
566 codec::SubType::Image,
567 codec::Version::V0,
568 bits,
569 &hash_digest,
570 )?;
571 Ok(ImageCodeResult {
572 iscc: format!("ISCC:{component}"),
573 })
574}
575
576fn array_split<T>(slice: &[T], n: usize) -> Vec<&[T]> {
582 if n == 0 {
583 return vec![];
584 }
585 let len = slice.len();
586 let base = len / n;
587 let remainder = len % n;
588 let mut parts = Vec::with_capacity(n);
589 let mut offset = 0;
590 for i in 0..n {
591 let size = base + if i < remainder { 1 } else { 0 };
592 parts.push(&slice[offset..offset + size]);
593 offset += size;
594 }
595 parts
596}
597
598fn soft_hash_audio_v0(cv: &[i32]) -> Vec<u8> {
605 let digests: Vec<[u8; 4]> = cv.iter().map(|&v| v.to_be_bytes()).collect();
607
608 if digests.is_empty() {
609 return vec![0u8; 32];
610 }
611
612 let mut parts: Vec<u8> = simhash::alg_simhash_inner(&digests);
614
615 let quarters = array_split(&digests, 4);
617 for quarter in &quarters {
618 if quarter.is_empty() {
619 parts.extend_from_slice(&[0u8; 4]);
620 } else {
621 parts.extend_from_slice(&simhash::alg_simhash_inner(quarter));
622 }
623 }
624
625 let mut sorted_values: Vec<i32> = cv.to_vec();
627 sorted_values.sort();
628 let sorted_digests: Vec<[u8; 4]> = sorted_values.iter().map(|&v| v.to_be_bytes()).collect();
629 let thirds = array_split(&sorted_digests, 3);
630 for third in &thirds {
631 if third.is_empty() {
632 parts.extend_from_slice(&[0u8; 4]);
633 } else {
634 parts.extend_from_slice(&simhash::alg_simhash_inner(third));
635 }
636 }
637
638 parts
639}
640
641pub fn gen_audio_code_v0(cv: &[i32], bits: u32) -> IsccResult<AudioCodeResult> {
646 let hash_digest = soft_hash_audio_v0(cv);
647 let component = codec::encode_component(
648 codec::MainType::Content,
649 codec::SubType::Audio,
650 codec::Version::V0,
651 bits,
652 &hash_digest,
653 )?;
654 Ok(AudioCodeResult {
655 iscc: format!("ISCC:{component}"),
656 })
657}
658
659pub fn soft_hash_video_v0<S: AsRef<[i32]> + Ord>(
664 frame_sigs: &[S],
665 bits: u32,
666) -> IsccResult<Vec<u8>> {
667 if frame_sigs.is_empty() {
668 return Err(IsccError::InvalidInput(
669 "frame_sigs must not be empty".into(),
670 ));
671 }
672
673 let unique: std::collections::BTreeSet<&S> = frame_sigs.iter().collect();
675
676 let cols = frame_sigs[0].as_ref().len();
678 let mut vecsum = vec![0i64; cols];
679 for sig in &unique {
680 for (c, &val) in sig.as_ref().iter().enumerate() {
681 vecsum[c] += val as i64;
682 }
683 }
684
685 wtahash::alg_wtahash(&vecsum, bits)
686}
687
688pub fn gen_video_code_v0<S: AsRef<[i32]> + Ord>(
693 frame_sigs: &[S],
694 bits: u32,
695) -> IsccResult<VideoCodeResult> {
696 let digest = soft_hash_video_v0(frame_sigs, bits)?;
697 let component = codec::encode_component(
698 codec::MainType::Content,
699 codec::SubType::Video,
700 codec::Version::V0,
701 bits,
702 &digest,
703 )?;
704 Ok(VideoCodeResult {
705 iscc: format!("ISCC:{component}"),
706 })
707}
708
709fn soft_hash_codes_v0(cc_digests: &[Vec<u8>], bits: u32) -> IsccResult<Vec<u8>> {
716 if cc_digests.len() < 2 {
717 return Err(IsccError::InvalidInput(
718 "at least 2 Content-Codes required for mixing".into(),
719 ));
720 }
721
722 let nbytes = (bits / 8) as usize;
723 let mut prepared: Vec<Vec<u8>> = Vec::with_capacity(cc_digests.len());
724
725 for raw in cc_digests {
726 let (mtype, stype, _ver, blen, body) = codec::decode_header(raw)?;
727 if mtype != codec::MainType::Content {
728 return Err(IsccError::InvalidInput(
729 "all codes must be Content-Codes".into(),
730 ));
731 }
732 let unit_bits = codec::decode_length(mtype, blen, stype);
733 if unit_bits < bits {
734 return Err(IsccError::InvalidInput(format!(
735 "Content-Code too short for {bits}-bit length (has {unit_bits} bits)"
736 )));
737 }
738 let mut entry = Vec::with_capacity(nbytes);
739 entry.push(raw[0]); let take = std::cmp::min(nbytes - 1, body.len());
741 entry.extend_from_slice(&body[..take]);
742 while entry.len() < nbytes {
744 entry.push(0);
745 }
746 prepared.push(entry);
747 }
748
749 Ok(simhash::alg_simhash_inner(&prepared))
750}
751
752pub fn gen_mixed_code_v0(codes: &[&str], bits: u32) -> IsccResult<MixedCodeResult> {
758 let decoded: Vec<Vec<u8>> = codes
759 .iter()
760 .map(|code| {
761 let clean = code.strip_prefix("ISCC:").unwrap_or(code);
762 codec::decode_base32(clean)
763 })
764 .collect::<IsccResult<Vec<Vec<u8>>>>()?;
765
766 let digest = soft_hash_codes_v0(&decoded, bits)?;
767
768 let component = codec::encode_component(
769 codec::MainType::Content,
770 codec::SubType::Mixed,
771 codec::Version::V0,
772 bits,
773 &digest,
774 )?;
775
776 Ok(MixedCodeResult {
777 iscc: format!("ISCC:{component}"),
778 parts: codes.iter().map(|s| s.to_string()).collect(),
779 })
780}
781
782pub fn gen_data_code_v0(data: &[u8], bits: u32) -> IsccResult<DataCodeResult> {
788 let chunks = cdc::alg_cdc_chunks(data, false, cdc::DATA_AVG_CHUNK_SIZE);
789 let mut features: Vec<u32> = chunks
790 .iter()
791 .map(|chunk| xxhash_rust::xxh32::xxh32(chunk, 0))
792 .collect();
793
794 if features.is_empty() {
796 features.push(xxhash_rust::xxh32::xxh32(b"", 0));
797 }
798
799 let digest = minhash::alg_minhash_256(&features);
800 let component = codec::encode_component(
801 codec::MainType::Data,
802 codec::SubType::None,
803 codec::Version::V0,
804 bits,
805 &digest,
806 )?;
807
808 Ok(DataCodeResult {
809 iscc: format!("ISCC:{component}"),
810 })
811}
812
813pub fn gen_instance_code_v0(data: &[u8], bits: u32) -> IsccResult<InstanceCodeResult> {
818 let digest = blake3::hash(data);
819 let datahash = utils::multi_hash_blake3(data);
820 let filesize = data.len() as u64;
821 let component = codec::encode_component(
822 codec::MainType::Instance,
823 codec::SubType::None,
824 codec::Version::V0,
825 bits,
826 digest.as_bytes(),
827 )?;
828 Ok(InstanceCodeResult {
829 iscc: format!("ISCC:{component}"),
830 datahash,
831 filesize,
832 })
833}
834
835pub fn gen_iscc_code_v0(codes: &[&str], wide: bool) -> IsccResult<IsccCodeResult> {
844 let cleaned: Vec<&str> = codes
846 .iter()
847 .map(|c| c.strip_prefix("ISCC:").unwrap_or(c))
848 .collect();
849
850 if cleaned.len() < 2 {
852 return Err(IsccError::InvalidInput(
853 "at least 2 ISCC unit codes required".into(),
854 ));
855 }
856
857 for code in &cleaned {
859 if code.len() < 16 {
860 return Err(IsccError::InvalidInput(format!(
861 "ISCC unit code too short (min 16 chars): {code}"
862 )));
863 }
864 }
865
866 let mut decoded: Vec<(
868 codec::MainType,
869 codec::SubType,
870 codec::Version,
871 u32,
872 Vec<u8>,
873 )> = Vec::with_capacity(cleaned.len());
874 for code in &cleaned {
875 let raw = codec::decode_base32(code)?;
876 let header = codec::decode_header(&raw)?;
877 decoded.push(header);
878 }
879
880 decoded.sort_by_key(|&(mt, ..)| mt);
882
883 let main_types: Vec<codec::MainType> = decoded.iter().map(|&(mt, ..)| mt).collect();
885
886 let n = main_types.len();
888 if main_types[n - 2] != codec::MainType::Data || main_types[n - 1] != codec::MainType::Instance
889 {
890 return Err(IsccError::InvalidInput(
891 "Data-Code and Instance-Code are mandatory".into(),
892 ));
893 }
894
895 let is_wide = wide
897 && decoded.len() == 2
898 && main_types == [codec::MainType::Data, codec::MainType::Instance]
899 && decoded
900 .iter()
901 .all(|&(mt, st, _, len, _)| codec::decode_length(mt, len, st) >= 128);
902
903 let st = if is_wide {
905 codec::SubType::Wide
906 } else {
907 let sc_subtypes: Vec<codec::SubType> = decoded
909 .iter()
910 .filter(|&&(mt, ..)| mt == codec::MainType::Semantic || mt == codec::MainType::Content)
911 .map(|&(_, st, ..)| st)
912 .collect();
913
914 if !sc_subtypes.is_empty() {
915 let first = sc_subtypes[0];
917 if sc_subtypes.iter().all(|&s| s == first) {
918 first
919 } else {
920 return Err(IsccError::InvalidInput(
921 "mixed SubTypes among Content/Semantic units".into(),
922 ));
923 }
924 } else if decoded.len() == 2 {
925 codec::SubType::Sum
926 } else {
927 codec::SubType::IsccNone
928 }
929 };
930
931 let optional_types = &main_types[..n - 2];
933 let encoded_length = codec::encode_units(optional_types)?;
934
935 let bytes_per_unit = if is_wide { 16 } else { 8 };
937 let mut digest = Vec::with_capacity(decoded.len() * bytes_per_unit);
938 for (_, _, _, _, tail) in &decoded {
939 let take = bytes_per_unit.min(tail.len());
940 digest.extend_from_slice(&tail[..take]);
941 }
942
943 let header = codec::encode_header(
945 codec::MainType::Iscc,
946 st,
947 codec::Version::V0,
948 encoded_length,
949 )?;
950 let mut code_bytes = header;
951 code_bytes.extend_from_slice(&digest);
952 let code = codec::encode_base32(&code_bytes);
953
954 Ok(IsccCodeResult {
956 iscc: format!("ISCC:{code}"),
957 })
958}
959
960pub fn gen_sum_code_v0(path: &std::path::Path, bits: u32, wide: bool) -> IsccResult<SumCodeResult> {
968 use std::io::Read;
969
970 let mut file = std::fs::File::open(path)
971 .map_err(|e| IsccError::InvalidInput(format!("Cannot open file: {e}")))?;
972
973 let mut data_hasher = streaming::DataHasher::new();
974 let mut instance_hasher = streaming::InstanceHasher::new();
975
976 let mut buf = vec![0u8; IO_READ_SIZE];
977 loop {
978 let n = file
979 .read(&mut buf)
980 .map_err(|e| IsccError::InvalidInput(format!("Cannot read file: {e}")))?;
981 if n == 0 {
982 break;
983 }
984 data_hasher.update(&buf[..n]);
985 instance_hasher.update(&buf[..n]);
986 }
987
988 let data_result = data_hasher.finalize(bits)?;
989 let instance_result = instance_hasher.finalize(bits)?;
990
991 let iscc_result = gen_iscc_code_v0(&[&data_result.iscc, &instance_result.iscc], wide)?;
992
993 Ok(SumCodeResult {
994 iscc: iscc_result.iscc,
995 datahash: instance_result.datahash,
996 filesize: instance_result.filesize,
997 })
998}
999
1000#[cfg(test)]
1001mod tests {
1002 use super::*;
1003
1004 #[test]
1005 fn test_gen_meta_code_v0_title_only() {
1006 let result = gen_meta_code_v0("Die Unendliche Geschichte", None, None, 64).unwrap();
1007 assert_eq!(result.iscc, "ISCC:AAAZXZ6OU74YAZIM");
1008 assert_eq!(result.name, "Die Unendliche Geschichte");
1009 assert_eq!(result.description, None);
1010 assert_eq!(result.meta, None);
1011 }
1012
1013 #[test]
1014 fn test_gen_meta_code_v0_title_description() {
1015 let result = gen_meta_code_v0(
1016 "Die Unendliche Geschichte",
1017 Some("Von Michael Ende"),
1018 None,
1019 64,
1020 )
1021 .unwrap();
1022 assert_eq!(result.iscc, "ISCC:AAAZXZ6OU4E45RB5");
1023 assert_eq!(result.name, "Die Unendliche Geschichte");
1024 assert_eq!(result.description, Some("Von Michael Ende".to_string()));
1025 assert_eq!(result.meta, None);
1026 }
1027
1028 #[test]
1029 fn test_gen_meta_code_v0_json_meta() {
1030 let result = gen_meta_code_v0("Hello", None, Some(r#"{"some":"object"}"#), 64).unwrap();
1031 assert_eq!(result.iscc, "ISCC:AAAWKLHFXN63LHL2");
1032 assert!(result.meta.is_some());
1033 assert!(
1034 result
1035 .meta
1036 .unwrap()
1037 .starts_with("data:application/json;base64,")
1038 );
1039 }
1040
1041 #[test]
1042 fn test_gen_meta_code_v0_data_url_meta() {
1043 let result = gen_meta_code_v0(
1044 "Hello",
1045 None,
1046 Some("data:application/json;charset=utf-8;base64,eyJzb21lIjogIm9iamVjdCJ9"),
1047 64,
1048 )
1049 .unwrap();
1050 assert_eq!(result.iscc, "ISCC:AAAWKLHFXN43ICP2");
1051 assert_eq!(
1053 result.meta,
1054 Some("data:application/json;charset=utf-8;base64,eyJzb21lIjogIm9iamVjdCJ9".to_string())
1055 );
1056 }
1057
1058 #[test]
1064 fn test_gen_meta_code_v0_jcs_float_canonicalization() {
1065 let result = gen_meta_code_v0("Test", None, Some(r#"{"value":1.0}"#), 64).unwrap();
1068
1069 assert_eq!(
1071 result.iscc, "ISCC:AAAX4GX3RZH2I6QZ",
1072 "ISCC mismatch: parse_meta_json must use RFC 8785 (JCS) canonicalization"
1073 );
1074 assert_eq!(
1075 result.meta,
1076 Some("data:application/json;base64,eyJ2YWx1ZSI6MX0=".to_string()),
1077 "meta Data-URL mismatch: JCS should serialize 1.0 as 1"
1078 );
1079 assert_eq!(
1080 result.metahash, "1e2010b291d392b6999ffe4aa4661fb343fc371fca3bfb5bb4e8d8226fdf85743232",
1081 "metahash mismatch: canonical bytes differ between JCS and serde_json"
1082 );
1083 }
1084
1085 #[test]
1090 fn test_gen_meta_code_v0_jcs_large_float_canonicalization() {
1091 let result = gen_meta_code_v0("Test", None, Some(r#"{"value":1e20}"#), 64).unwrap();
1092
1093 assert_eq!(
1094 result.iscc, "ISCC:AAAX4GX3R32YH5P7",
1095 "ISCC mismatch: JCS should expand 1e20 to 100000000000000000000"
1096 );
1097 assert_eq!(
1098 result.meta,
1099 Some(
1100 "data:application/json;base64,eyJ2YWx1ZSI6MTAwMDAwMDAwMDAwMDAwMDAwMDAwfQ=="
1101 .to_string()
1102 ),
1103 "meta Data-URL mismatch: JCS should expand large float to integer form"
1104 );
1105 assert_eq!(
1106 result.metahash, "1e201ff83c1822c348717658a0b4713739646da7c59832691b337a457416ddd1c73d",
1107 "metahash mismatch: canonical bytes differ for large float"
1108 );
1109 }
1110
1111 #[test]
1112 fn test_gen_meta_code_v0_invalid_json() {
1113 assert!(matches!(
1114 gen_meta_code_v0("test", None, Some("not json"), 64),
1115 Err(IsccError::InvalidInput(_))
1116 ));
1117 }
1118
1119 #[test]
1120 fn test_gen_meta_code_v0_invalid_data_url() {
1121 assert!(matches!(
1122 gen_meta_code_v0("test", None, Some("data:no-comma-here"), 64),
1123 Err(IsccError::InvalidInput(_))
1124 ));
1125 }
1126
1127 #[test]
1128 fn test_gen_meta_code_v0_conformance() {
1129 let json_str = include_str!("../tests/data.json");
1130 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1131 let section = &data["gen_meta_code_v0"];
1132 let cases = section.as_object().unwrap();
1133
1134 let mut tested = 0;
1135
1136 for (tc_name, tc) in cases {
1137 let inputs = tc["inputs"].as_array().unwrap();
1138 let input_name = inputs[0].as_str().unwrap();
1139 let input_desc = inputs[1].as_str().unwrap();
1140 let meta_val = &inputs[2];
1141 let bits = inputs[3].as_u64().unwrap() as u32;
1142
1143 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1144 let expected_metahash = tc["outputs"]["metahash"].as_str().unwrap();
1145
1146 let meta_arg: Option<String> = match meta_val {
1148 serde_json::Value::Null => None,
1149 serde_json::Value::String(s) => Some(s.clone()),
1150 serde_json::Value::Object(_) => Some(serde_json::to_string(meta_val).unwrap()),
1151 other => panic!("unexpected meta type in {tc_name}: {other:?}"),
1152 };
1153
1154 let desc = if input_desc.is_empty() {
1155 None
1156 } else {
1157 Some(input_desc)
1158 };
1159
1160 let result = gen_meta_code_v0(input_name, desc, meta_arg.as_deref(), bits)
1162 .unwrap_or_else(|e| panic!("gen_meta_code_v0 failed for {tc_name}: {e}"));
1163 assert_eq!(
1164 result.iscc, expected_iscc,
1165 "ISCC mismatch in test case {tc_name}"
1166 );
1167
1168 assert_eq!(
1170 result.metahash, expected_metahash,
1171 "metahash mismatch in test case {tc_name}"
1172 );
1173
1174 if let Some(expected_name) = tc["outputs"].get("name") {
1176 let expected_name = expected_name.as_str().unwrap();
1177 assert_eq!(
1178 result.name, expected_name,
1179 "name mismatch in test case {tc_name}"
1180 );
1181 }
1182
1183 if let Some(expected_desc) = tc["outputs"].get("description") {
1185 let expected_desc = expected_desc.as_str().unwrap();
1186 assert_eq!(
1187 result.description.as_deref(),
1188 Some(expected_desc),
1189 "description mismatch in test case {tc_name}"
1190 );
1191 }
1192
1193 if meta_arg.is_some() {
1195 assert!(
1196 result.meta.is_some(),
1197 "meta should be present in test case {tc_name}"
1198 );
1199 } else {
1200 assert!(
1201 result.meta.is_none(),
1202 "meta should be absent in test case {tc_name}"
1203 );
1204 }
1205
1206 tested += 1;
1207 }
1208
1209 assert_eq!(tested, 16, "expected 16 conformance tests to run");
1210 }
1211
1212 #[test]
1213 fn test_gen_text_code_v0_empty() {
1214 let result = gen_text_code_v0("", 64).unwrap();
1215 assert_eq!(result.iscc, "ISCC:EAASL4F2WZY7KBXB");
1216 assert_eq!(result.characters, 0);
1217 }
1218
1219 #[test]
1220 fn test_gen_text_code_v0_hello_world() {
1221 let result = gen_text_code_v0("Hello World", 64).unwrap();
1222 assert_eq!(result.iscc, "ISCC:EAASKDNZNYGUUF5A");
1223 assert_eq!(result.characters, 10); }
1225
1226 #[test]
1227 fn test_gen_text_code_v0_conformance() {
1228 let json_str = include_str!("../tests/data.json");
1229 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1230 let section = &data["gen_text_code_v0"];
1231 let cases = section.as_object().unwrap();
1232
1233 let mut tested = 0;
1234
1235 for (tc_name, tc) in cases {
1236 let inputs = tc["inputs"].as_array().unwrap();
1237 let input_text = inputs[0].as_str().unwrap();
1238 let bits = inputs[1].as_u64().unwrap() as u32;
1239
1240 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1241 let expected_chars = tc["outputs"]["characters"].as_u64().unwrap() as usize;
1242
1243 let result = gen_text_code_v0(input_text, bits)
1245 .unwrap_or_else(|e| panic!("gen_text_code_v0 failed for {tc_name}: {e}"));
1246 assert_eq!(
1247 result.iscc, expected_iscc,
1248 "ISCC mismatch in test case {tc_name}"
1249 );
1250
1251 assert_eq!(
1253 result.characters, expected_chars,
1254 "character count mismatch in test case {tc_name}"
1255 );
1256
1257 tested += 1;
1258 }
1259
1260 assert_eq!(tested, 5, "expected 5 conformance tests to run");
1261 }
1262
1263 #[test]
1264 fn test_gen_image_code_v0_all_black() {
1265 let pixels = vec![0u8; 1024];
1266 let result = gen_image_code_v0(&pixels, 64).unwrap();
1267 assert_eq!(result.iscc, "ISCC:EEAQAAAAAAAAAAAA");
1268 }
1269
1270 #[test]
1271 fn test_gen_image_code_v0_all_white() {
1272 let pixels = vec![255u8; 1024];
1273 let result = gen_image_code_v0(&pixels, 128).unwrap();
1274 assert_eq!(result.iscc, "ISCC:EEBYAAAAAAAAAAAAAAAAAAAAAAAAA");
1275 }
1276
1277 #[test]
1278 fn test_gen_image_code_v0_invalid_pixel_count() {
1279 assert!(gen_image_code_v0(&[0u8; 100], 64).is_err());
1280 }
1281
1282 #[test]
1283 fn test_gen_image_code_v0_conformance() {
1284 let json_str = include_str!("../tests/data.json");
1285 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1286 let section = &data["gen_image_code_v0"];
1287 let cases = section.as_object().unwrap();
1288
1289 let mut tested = 0;
1290
1291 for (tc_name, tc) in cases {
1292 let inputs = tc["inputs"].as_array().unwrap();
1293 let pixels_json = inputs[0].as_array().unwrap();
1294 let bits = inputs[1].as_u64().unwrap() as u32;
1295 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1296
1297 let pixels: Vec<u8> = pixels_json
1298 .iter()
1299 .map(|v| v.as_u64().unwrap() as u8)
1300 .collect();
1301
1302 let result = gen_image_code_v0(&pixels, bits)
1303 .unwrap_or_else(|e| panic!("gen_image_code_v0 failed for {tc_name}: {e}"));
1304 assert_eq!(
1305 result.iscc, expected_iscc,
1306 "ISCC mismatch in test case {tc_name}"
1307 );
1308
1309 tested += 1;
1310 }
1311
1312 assert_eq!(tested, 3, "expected 3 conformance tests to run");
1313 }
1314
1315 #[test]
1316 fn test_gen_audio_code_v0_empty() {
1317 let result = gen_audio_code_v0(&[], 64).unwrap();
1318 assert_eq!(result.iscc, "ISCC:EIAQAAAAAAAAAAAA");
1319 }
1320
1321 #[test]
1322 fn test_gen_audio_code_v0_single() {
1323 let result = gen_audio_code_v0(&[1], 128).unwrap();
1324 assert_eq!(result.iscc, "ISCC:EIBQAAAAAEAAAAABAAAAAAAAAAAAA");
1325 }
1326
1327 #[test]
1328 fn test_gen_audio_code_v0_negative() {
1329 let result = gen_audio_code_v0(&[-1, 0, 1], 256).unwrap();
1330 assert_eq!(
1331 result.iscc,
1332 "ISCC:EIDQAAAAAH777777AAAAAAAAAAAACAAAAAAP777774AAAAAAAAAAAAI"
1333 );
1334 }
1335
1336 #[test]
1337 fn test_gen_audio_code_v0_conformance() {
1338 let json_str = include_str!("../tests/data.json");
1339 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1340 let section = &data["gen_audio_code_v0"];
1341 let cases = section.as_object().unwrap();
1342
1343 let mut tested = 0;
1344
1345 for (tc_name, tc) in cases {
1346 let inputs = tc["inputs"].as_array().unwrap();
1347 let cv_json = inputs[0].as_array().unwrap();
1348 let bits = inputs[1].as_u64().unwrap() as u32;
1349 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1350
1351 let cv: Vec<i32> = cv_json.iter().map(|v| v.as_i64().unwrap() as i32).collect();
1352
1353 let result = gen_audio_code_v0(&cv, bits)
1354 .unwrap_or_else(|e| panic!("gen_audio_code_v0 failed for {tc_name}: {e}"));
1355 assert_eq!(
1356 result.iscc, expected_iscc,
1357 "ISCC mismatch in test case {tc_name}"
1358 );
1359
1360 tested += 1;
1361 }
1362
1363 assert_eq!(tested, 5, "expected 5 conformance tests to run");
1364 }
1365
1366 #[test]
1367 fn test_array_split_even() {
1368 let data = vec![1, 2, 3, 4];
1369 let parts = array_split(&data, 4);
1370 assert_eq!(parts, vec![&[1][..], &[2][..], &[3][..], &[4][..]]);
1371 }
1372
1373 #[test]
1374 fn test_array_split_remainder() {
1375 let data = vec![1, 2, 3, 4, 5];
1376 let parts = array_split(&data, 3);
1377 assert_eq!(parts, vec![&[1, 2][..], &[3, 4][..], &[5][..]]);
1378 }
1379
1380 #[test]
1381 fn test_array_split_more_parts_than_elements() {
1382 let data = vec![1, 2];
1383 let parts = array_split(&data, 4);
1384 assert_eq!(
1385 parts,
1386 vec![&[1][..], &[2][..], &[][..] as &[i32], &[][..] as &[i32]]
1387 );
1388 }
1389
1390 #[test]
1391 fn test_array_split_empty() {
1392 let data: Vec<i32> = vec![];
1393 let parts = array_split(&data, 3);
1394 assert_eq!(
1395 parts,
1396 vec![&[][..] as &[i32], &[][..] as &[i32], &[][..] as &[i32]]
1397 );
1398 }
1399
1400 #[test]
1401 fn test_gen_video_code_v0_empty_frames() {
1402 let frames: Vec<Vec<i32>> = vec![];
1403 assert!(matches!(
1404 gen_video_code_v0(&frames, 64),
1405 Err(IsccError::InvalidInput(_))
1406 ));
1407 }
1408
1409 #[test]
1410 fn test_gen_video_code_v0_conformance() {
1411 let json_str = include_str!("../tests/data.json");
1412 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1413 let section = &data["gen_video_code_v0"];
1414 let cases = section.as_object().unwrap();
1415
1416 let mut tested = 0;
1417
1418 for (tc_name, tc) in cases {
1419 let inputs = tc["inputs"].as_array().unwrap();
1420 let frames_json = inputs[0].as_array().unwrap();
1421 let bits = inputs[1].as_u64().unwrap() as u32;
1422 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1423
1424 let frame_sigs: Vec<Vec<i32>> = frames_json
1425 .iter()
1426 .map(|frame| {
1427 frame
1428 .as_array()
1429 .unwrap()
1430 .iter()
1431 .map(|v| v.as_i64().unwrap() as i32)
1432 .collect()
1433 })
1434 .collect();
1435
1436 let result = gen_video_code_v0(&frame_sigs, bits)
1437 .unwrap_or_else(|e| panic!("gen_video_code_v0 failed for {tc_name}: {e}"));
1438 assert_eq!(
1439 result.iscc, expected_iscc,
1440 "ISCC mismatch in test case {tc_name}"
1441 );
1442
1443 tested += 1;
1444 }
1445
1446 assert_eq!(tested, 3, "expected 3 conformance tests to run");
1447 }
1448
1449 #[test]
1450 fn test_gen_mixed_code_v0_conformance() {
1451 let json_str = include_str!("../tests/data.json");
1452 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1453 let section = &data["gen_mixed_code_v0"];
1454 let cases = section.as_object().unwrap();
1455
1456 let mut tested = 0;
1457
1458 for (tc_name, tc) in cases {
1459 let inputs = tc["inputs"].as_array().unwrap();
1460 let codes_json = inputs[0].as_array().unwrap();
1461 let bits = inputs[1].as_u64().unwrap() as u32;
1462 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1463 let expected_parts: Vec<&str> = tc["outputs"]["parts"]
1464 .as_array()
1465 .unwrap()
1466 .iter()
1467 .map(|v| v.as_str().unwrap())
1468 .collect();
1469
1470 let codes: Vec<&str> = codes_json.iter().map(|v| v.as_str().unwrap()).collect();
1471
1472 let result = gen_mixed_code_v0(&codes, bits)
1473 .unwrap_or_else(|e| panic!("gen_mixed_code_v0 failed for {tc_name}: {e}"));
1474 assert_eq!(
1475 result.iscc, expected_iscc,
1476 "ISCC mismatch in test case {tc_name}"
1477 );
1478
1479 let result_parts: Vec<&str> = result.parts.iter().map(|s| s.as_str()).collect();
1481 assert_eq!(
1482 result_parts, expected_parts,
1483 "parts mismatch in test case {tc_name}"
1484 );
1485
1486 tested += 1;
1487 }
1488
1489 assert_eq!(tested, 2, "expected 2 conformance tests to run");
1490 }
1491
1492 #[test]
1493 fn test_gen_mixed_code_v0_too_few_codes() {
1494 assert!(matches!(
1495 gen_mixed_code_v0(&["EUA6GIKXN42IQV3S"], 64),
1496 Err(IsccError::InvalidInput(_))
1497 ));
1498 }
1499
1500 fn make_content_code_raw(stype: codec::SubType, bit_length: u32) -> Vec<u8> {
1502 let nbytes = (bit_length / 8) as usize;
1503 let body: Vec<u8> = (0..nbytes).map(|i| (i & 0xFF) as u8).collect();
1504 let base32 = codec::encode_component(
1505 codec::MainType::Content,
1506 stype,
1507 codec::Version::V0,
1508 bit_length,
1509 &body,
1510 )
1511 .unwrap();
1512 codec::decode_base32(&base32).unwrap()
1513 }
1514
1515 #[test]
1516 fn test_soft_hash_codes_v0_rejects_short_code() {
1517 let code_64 = make_content_code_raw(codec::SubType::None, 64);
1519 let code_32 = make_content_code_raw(codec::SubType::Image, 32);
1520 let result = soft_hash_codes_v0(&[code_64, code_32], 64);
1521 assert!(
1522 matches!(&result, Err(IsccError::InvalidInput(msg)) if msg.contains("too short")),
1523 "expected InvalidInput with 'too short', got {result:?}"
1524 );
1525 }
1526
1527 #[test]
1528 fn test_soft_hash_codes_v0_accepts_exact_length() {
1529 let code_a = make_content_code_raw(codec::SubType::None, 64);
1531 let code_b = make_content_code_raw(codec::SubType::Image, 64);
1532 let result = soft_hash_codes_v0(&[code_a, code_b], 64);
1533 assert!(result.is_ok(), "expected Ok, got {result:?}");
1534 }
1535
1536 #[test]
1537 fn test_soft_hash_codes_v0_accepts_longer_codes() {
1538 let code_a = make_content_code_raw(codec::SubType::None, 128);
1540 let code_b = make_content_code_raw(codec::SubType::Audio, 128);
1541 let result = soft_hash_codes_v0(&[code_a, code_b], 64);
1542 assert!(result.is_ok(), "expected Ok, got {result:?}");
1543 }
1544
1545 #[test]
1546 fn test_gen_data_code_v0_conformance() {
1547 let json_str = include_str!("../tests/data.json");
1548 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1549 let section = &data["gen_data_code_v0"];
1550 let cases = section.as_object().unwrap();
1551
1552 let mut tested = 0;
1553
1554 for (tc_name, tc) in cases {
1555 let inputs = tc["inputs"].as_array().unwrap();
1556 let stream_str = inputs[0].as_str().unwrap();
1557 let bits = inputs[1].as_u64().unwrap() as u32;
1558 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1559
1560 let hex_data = stream_str
1562 .strip_prefix("stream:")
1563 .unwrap_or_else(|| panic!("expected 'stream:' prefix in test case {tc_name}"));
1564 let input_bytes = hex::decode(hex_data)
1565 .unwrap_or_else(|e| panic!("invalid hex in test case {tc_name}: {e}"));
1566
1567 let result = gen_data_code_v0(&input_bytes, bits)
1568 .unwrap_or_else(|e| panic!("gen_data_code_v0 failed for {tc_name}: {e}"));
1569 assert_eq!(
1570 result.iscc, expected_iscc,
1571 "ISCC mismatch in test case {tc_name}"
1572 );
1573
1574 tested += 1;
1575 }
1576
1577 assert_eq!(tested, 4, "expected 4 conformance tests to run");
1578 }
1579
1580 #[test]
1581 fn test_gen_instance_code_v0_empty() {
1582 let result = gen_instance_code_v0(b"", 64).unwrap();
1583 assert_eq!(result.iscc, "ISCC:IAA26E2JXH27TING");
1584 assert_eq!(result.filesize, 0);
1585 assert_eq!(
1586 result.datahash,
1587 "1e20af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262"
1588 );
1589 }
1590
1591 #[test]
1592 fn test_gen_instance_code_v0_conformance() {
1593 let json_str = include_str!("../tests/data.json");
1594 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1595 let section = &data["gen_instance_code_v0"];
1596 let cases = section.as_object().unwrap();
1597
1598 for (name, tc) in cases {
1599 let inputs = tc["inputs"].as_array().unwrap();
1600 let stream_str = inputs[0].as_str().unwrap();
1601 let bits = inputs[1].as_u64().unwrap() as u32;
1602 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1603
1604 let hex_data = stream_str
1606 .strip_prefix("stream:")
1607 .unwrap_or_else(|| panic!("expected 'stream:' prefix in test case {name}"));
1608 let input_bytes = hex::decode(hex_data)
1609 .unwrap_or_else(|e| panic!("invalid hex in test case {name}: {e}"));
1610
1611 let result = gen_instance_code_v0(&input_bytes, bits)
1612 .unwrap_or_else(|e| panic!("gen_instance_code_v0 failed for {name}: {e}"));
1613 assert_eq!(
1614 result.iscc, expected_iscc,
1615 "ISCC mismatch in test case {name}"
1616 );
1617
1618 if let Some(expected_datahash) = tc["outputs"].get("datahash") {
1620 let expected_datahash = expected_datahash.as_str().unwrap();
1621 assert_eq!(
1622 result.datahash, expected_datahash,
1623 "datahash mismatch in test case {name}"
1624 );
1625 }
1626
1627 if let Some(expected_filesize) = tc["outputs"].get("filesize") {
1629 let expected_filesize = expected_filesize.as_u64().unwrap();
1630 assert_eq!(
1631 result.filesize, expected_filesize,
1632 "filesize mismatch in test case {name}"
1633 );
1634 }
1635
1636 assert_eq!(
1638 result.filesize,
1639 input_bytes.len() as u64,
1640 "filesize should match input length in test case {name}"
1641 );
1642 }
1643 }
1644
1645 #[test]
1646 fn test_gen_iscc_code_v0_conformance() {
1647 let json_str = include_str!("../tests/data.json");
1648 let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1649 let section = &data["gen_iscc_code_v0"];
1650 let cases = section.as_object().unwrap();
1651
1652 let mut tested = 0;
1653
1654 for (tc_name, tc) in cases {
1655 let inputs = tc["inputs"].as_array().unwrap();
1656 let codes_json = inputs[0].as_array().unwrap();
1657 let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1658
1659 let codes: Vec<&str> = codes_json.iter().map(|v| v.as_str().unwrap()).collect();
1660
1661 let result = gen_iscc_code_v0(&codes, false)
1662 .unwrap_or_else(|e| panic!("gen_iscc_code_v0 failed for {tc_name}: {e}"));
1663 assert_eq!(
1664 result.iscc, expected_iscc,
1665 "ISCC mismatch in test case {tc_name}"
1666 );
1667
1668 tested += 1;
1669 }
1670
1671 assert_eq!(tested, 5, "expected 5 conformance tests to run");
1672 }
1673
1674 #[test]
1675 fn test_gen_iscc_code_v0_too_few_codes() {
1676 assert!(matches!(
1677 gen_iscc_code_v0(&["AAAWKLHFPV6OPKDG"], false),
1678 Err(IsccError::InvalidInput(_))
1679 ));
1680 }
1681
1682 #[test]
1683 fn test_gen_iscc_code_v0_missing_instance() {
1684 assert!(matches!(
1686 gen_iscc_code_v0(&["AAAWKLHFPV6OPKDG", "AAAWKLHFPV6OPKDG"], false),
1687 Err(IsccError::InvalidInput(_))
1688 ));
1689 }
1690
1691 #[test]
1692 fn test_gen_iscc_code_v0_short_code() {
1693 assert!(matches!(
1695 gen_iscc_code_v0(&["AAAWKLHFPV6", "AAAWKLHFPV6OPKDG"], false),
1696 Err(IsccError::InvalidInput(_))
1697 ));
1698 }
1699
1700 #[test]
1707 fn test_gen_meta_code_empty_data_url_enters_meta_branch() {
1708 let result =
1709 gen_meta_code_v0("Test", None, Some("data:application/json;base64,"), 64).unwrap();
1710
1711 assert_eq!(result.name, "Test");
1713
1714 assert_eq!(
1716 result.meta,
1717 Some("data:application/json;base64,".to_string()),
1718 "empty Data-URL payload should still enter meta branch"
1719 );
1720
1721 let expected_metahash = utils::multi_hash_blake3(&[]);
1723 assert_eq!(
1724 result.metahash, expected_metahash,
1725 "metahash should be BLAKE3 of empty bytes"
1726 );
1727 }
1728
1729 #[test]
1735 fn test_soft_hash_meta_v0_with_bytes_empty_equals_name_only() {
1736 let name_only = soft_hash_meta_v0("test", None);
1737 let empty_bytes = soft_hash_meta_v0_with_bytes("test", &[]);
1738 assert_eq!(
1739 name_only, empty_bytes,
1740 "empty bytes should produce same digest as name-only (no interleaving)"
1741 );
1742 }
1743
1744 #[test]
1747 fn test_meta_trim_name_value() {
1748 assert_eq!(META_TRIM_NAME, 128);
1749 }
1750
1751 #[test]
1752 fn test_meta_trim_description_value() {
1753 assert_eq!(META_TRIM_DESCRIPTION, 4096);
1754 }
1755
1756 #[test]
1757 fn test_io_read_size_value() {
1758 assert_eq!(IO_READ_SIZE, 4_194_304);
1759 }
1760
1761 #[test]
1762 fn test_text_ngram_size_value() {
1763 assert_eq!(TEXT_NGRAM_SIZE, 13);
1764 }
1765
1766 #[test]
1770 fn test_encode_component_matches_codec() {
1771 let digest = [0xABu8; 8];
1772 let tier1 = encode_component(3, 0, 0, 64, &digest).unwrap();
1773 let tier2 = codec::encode_component(
1774 codec::MainType::Data,
1775 codec::SubType::None,
1776 codec::Version::V0,
1777 64,
1778 &digest,
1779 )
1780 .unwrap();
1781 assert_eq!(tier1, tier2);
1782 }
1783
1784 #[test]
1786 fn test_encode_component_round_trip() {
1787 let digest = [0x42u8; 32];
1788 let result = encode_component(0, 0, 0, 64, &digest).unwrap();
1789 assert!(!result.is_empty());
1791 }
1792
1793 #[test]
1795 fn test_encode_component_rejects_iscc() {
1796 let result = encode_component(5, 0, 0, 64, &[0u8; 8]);
1797 assert!(result.is_err());
1798 }
1799
1800 #[test]
1802 fn test_encode_component_rejects_short_digest() {
1803 let result = encode_component(0, 0, 0, 64, &[0u8; 4]);
1804 assert!(result.is_err());
1805 let err = result.unwrap_err().to_string();
1806 assert!(
1807 err.contains("digest length 4 < bit_length/8 (8)"),
1808 "unexpected error: {err}"
1809 );
1810 }
1811
1812 #[test]
1814 fn test_encode_component_rejects_invalid_mtype() {
1815 let result = encode_component(99, 0, 0, 64, &[0u8; 8]);
1816 assert!(result.is_err());
1817 }
1818
1819 #[test]
1821 fn test_encode_component_rejects_invalid_stype() {
1822 let result = encode_component(0, 99, 0, 64, &[0u8; 8]);
1823 assert!(result.is_err());
1824 }
1825
1826 #[test]
1828 fn test_encode_component_rejects_invalid_version() {
1829 let result = encode_component(0, 0, 99, 64, &[0u8; 8]);
1830 assert!(result.is_err());
1831 }
1832
1833 #[test]
1837 fn test_iscc_decode_round_trip_meta() {
1838 let digest = [0xaa_u8; 8];
1839 let encoded = encode_component(0, 0, 0, 64, &digest).unwrap();
1840 let (mt, st, vs, li, decoded_digest) = iscc_decode(&encoded).unwrap();
1841 assert_eq!(mt, 0, "MainType::Meta");
1842 assert_eq!(st, 0, "SubType::None");
1843 assert_eq!(vs, 0, "Version::V0");
1844 assert_eq!(li, 1, "length_index");
1846 assert_eq!(decoded_digest, digest.to_vec());
1847 }
1848
1849 #[test]
1851 fn test_iscc_decode_round_trip_content() {
1852 let digest = [0xbb_u8; 8];
1853 let encoded = encode_component(2, 0, 0, 64, &digest).unwrap();
1854 let (mt, st, vs, _li, decoded_digest) = iscc_decode(&encoded).unwrap();
1855 assert_eq!(mt, 2, "MainType::Content");
1856 assert_eq!(st, 0, "SubType::TEXT");
1857 assert_eq!(vs, 0, "Version::V0");
1858 assert_eq!(decoded_digest, digest.to_vec());
1859 }
1860
1861 #[test]
1863 fn test_iscc_decode_round_trip_data() {
1864 let digest = [0xcc_u8; 8];
1865 let encoded = encode_component(3, 0, 0, 64, &digest).unwrap();
1866 let (mt, _st, _vs, _li, decoded_digest) = iscc_decode(&encoded).unwrap();
1867 assert_eq!(mt, 3, "MainType::Data");
1868 assert_eq!(decoded_digest, digest.to_vec());
1869 }
1870
1871 #[test]
1873 fn test_iscc_decode_round_trip_instance() {
1874 let digest = [0xdd_u8; 8];
1875 let encoded = encode_component(4, 0, 0, 64, &digest).unwrap();
1876 let (mt, _st, _vs, _li, decoded_digest) = iscc_decode(&encoded).unwrap();
1877 assert_eq!(mt, 4, "MainType::Instance");
1878 assert_eq!(decoded_digest, digest.to_vec());
1879 }
1880
1881 #[test]
1883 fn test_iscc_decode_with_prefix() {
1884 let digest = [0xaa_u8; 8];
1885 let encoded = encode_component(0, 0, 0, 64, &digest).unwrap();
1886 let with_prefix = format!("ISCC:{encoded}");
1887 let (mt, st, vs, li, decoded_digest) = iscc_decode(&with_prefix).unwrap();
1888 assert_eq!(mt, 0);
1889 assert_eq!(st, 0);
1890 assert_eq!(vs, 0);
1891 assert_eq!(li, 1);
1892 assert_eq!(decoded_digest, digest.to_vec());
1893 }
1894
1895 #[test]
1897 fn test_iscc_decode_with_dashes() {
1898 let digest = [0xaa_u8; 8];
1899 let encoded = encode_component(0, 0, 0, 64, &digest).unwrap();
1900 let with_dashes = format!("{}-{}-{}", &encoded[..4], &encoded[4..8], &encoded[8..]);
1902 let (mt, st, vs, li, decoded_digest) = iscc_decode(&with_dashes).unwrap();
1903 assert_eq!(mt, 0);
1904 assert_eq!(st, 0);
1905 assert_eq!(vs, 0);
1906 assert_eq!(li, 1);
1907 assert_eq!(decoded_digest, digest.to_vec());
1908 }
1909
1910 #[test]
1912 fn test_iscc_decode_invalid_base32() {
1913 let result = iscc_decode("!!!INVALID!!!");
1914 assert!(result.is_err());
1915 let err = result.unwrap_err().to_string();
1916 assert!(err.contains("base32"), "expected base32 error: {err}");
1917 }
1918
1919 #[test]
1922 fn test_iscc_decode_known_meta_code() {
1923 let (mt, st, vs, li, digest) = iscc_decode("ISCC:AAAZXZ6OU74YAZIM").unwrap();
1924 assert_eq!(mt, 0, "MainType::Meta");
1925 assert_eq!(st, 0, "SubType::None");
1926 assert_eq!(vs, 0, "Version::V0");
1927 assert_eq!(li, 1, "length_index for 64-bit");
1928 assert_eq!(digest.len(), 8, "64-bit = 8 bytes");
1929 }
1930
1931 #[test]
1934 fn test_iscc_decode_known_instance_code() {
1935 let (mt, st, vs, li, digest) = iscc_decode("ISCC:IAA26E2JXH27TING").unwrap();
1936 assert_eq!(mt, 4, "MainType::Instance");
1937 assert_eq!(st, 0, "SubType::None");
1938 assert_eq!(vs, 0, "Version::V0");
1939 assert_eq!(li, 1, "length_index for 64-bit");
1940 assert_eq!(digest.len(), 8, "64-bit = 8 bytes");
1941 }
1942
1943 #[test]
1946 fn test_iscc_decode_known_data_code() {
1947 let (mt, st, vs, _li, digest) = iscc_decode("ISCC:GAAXL2XYM5BQIAZ3").unwrap();
1948 assert_eq!(mt, 3, "MainType::Data");
1949 assert_eq!(st, 0, "SubType::None");
1950 assert_eq!(vs, 0, "Version::V0");
1951 assert_eq!(digest.len(), 8, "64-bit = 8 bytes");
1952 }
1953
1954 #[test]
1957 fn test_iscc_decode_verification_round_trip() {
1958 let digest = [0xaa_u8; 8];
1959 let encoded = encode_component(0, 0, 0, 64, &digest).unwrap();
1960 let result = iscc_decode(&encoded).unwrap();
1961 assert_eq!(result, (0, 0, 0, 1, vec![0xaa; 8]));
1962 }
1963
1964 #[test]
1966 fn test_iscc_decode_truncated_input() {
1967 let digest = [0xff_u8; 32];
1969 let encoded = encode_component(0, 0, 0, 256, &digest).unwrap();
1970 let truncated = &encoded[..6];
1972 let result = iscc_decode(truncated);
1973 assert!(result.is_err(), "should fail on truncated input");
1974 }
1975
1976 #[test]
1980 fn test_json_to_data_url_basic() {
1981 let url = json_to_data_url(r#"{"key": "value"}"#).unwrap();
1982 assert!(
1983 url.starts_with("data:application/json;base64,"),
1984 "expected application/json prefix, got: {url}"
1985 );
1986 }
1987
1988 #[test]
1990 fn test_json_to_data_url_ld_json() {
1991 let url = json_to_data_url(r#"{"@context": "https://schema.org"}"#).unwrap();
1992 assert!(
1993 url.starts_with("data:application/ld+json;base64,"),
1994 "expected application/ld+json prefix, got: {url}"
1995 );
1996 }
1997
1998 #[test]
2000 fn test_json_to_data_url_jcs_ordering() {
2001 let url = json_to_data_url(r#"{"b":1,"a":2}"#).unwrap();
2002 let b64 = url.split_once(',').unwrap().1;
2004 let decoded = data_encoding::BASE64.decode(b64.as_bytes()).unwrap();
2005 let canonical = std::str::from_utf8(&decoded).unwrap();
2006 assert_eq!(canonical, r#"{"a":2,"b":1}"#, "JCS should sort keys");
2007 }
2008
2009 #[test]
2012 fn test_json_to_data_url_round_trip() {
2013 let input = r#"{"hello": "world", "num": 42}"#;
2014 let url = json_to_data_url(input).unwrap();
2015 let decoded_bytes = decode_data_url(&url).unwrap();
2016 let canonical: serde_json::Value =
2018 serde_json::from_slice(&decoded_bytes).expect("decoded bytes should be valid JSON");
2019 let original: serde_json::Value = serde_json::from_str(input).unwrap();
2020 assert_eq!(canonical, original, "round-trip preserves JSON semantics");
2021 }
2022
2023 #[test]
2025 fn test_json_to_data_url_invalid_json() {
2026 let result = json_to_data_url("not json");
2027 assert!(result.is_err(), "should reject invalid JSON");
2028 let err = result.unwrap_err().to_string();
2029 assert!(
2030 err.contains("invalid JSON"),
2031 "expected 'invalid JSON' in error: {err}"
2032 );
2033 }
2034
2035 #[test]
2048 fn test_json_to_data_url_conformance_0016() {
2049 let url = json_to_data_url(r#"{"some": "object"}"#).unwrap();
2050 assert!(
2052 url.starts_with("data:application/json;base64,"),
2053 "expected application/json prefix"
2054 );
2055 let b64 = url.split_once(',').unwrap().1;
2057 let decoded = data_encoding::BASE64.decode(b64.as_bytes()).unwrap();
2058 let canonical = std::str::from_utf8(&decoded).unwrap();
2059 assert_eq!(
2060 canonical, r#"{"some":"object"}"#,
2061 "JCS removes whitespace from JSON"
2062 );
2063 }
2064
2065 #[test]
2066 fn test_meta_trim_meta_value() {
2067 assert_eq!(META_TRIM_META, 128_000);
2068 }
2069
2070 #[test]
2071 fn test_gen_meta_code_v0_meta_at_limit() {
2072 let padding = "a".repeat(128_000 - 8);
2076 let json_str = format!(r#"{{"x":"{padding}"}}"#);
2077 let result = gen_meta_code_v0("test", None, Some(&json_str), 64);
2078 assert!(
2079 result.is_ok(),
2080 "payload at exactly META_TRIM_META should succeed"
2081 );
2082 }
2083
2084 #[test]
2085 fn test_gen_meta_code_v0_meta_over_limit() {
2086 let padding = "a".repeat(128_000 - 8 + 1);
2088 let json_str = format!(r#"{{"x":"{padding}"}}"#);
2089 let result = gen_meta_code_v0("test", None, Some(&json_str), 64);
2090 assert!(
2091 matches!(result, Err(IsccError::InvalidInput(ref msg)) if msg.contains("size limit")),
2092 "payload exceeding META_TRIM_META should return InvalidInput"
2093 );
2094 }
2095
2096 #[test]
2097 fn test_gen_meta_code_v0_data_url_pre_decode_reject() {
2098 let pre_decode_limit = META_TRIM_META * 4 / 3 + 256;
2101 let padding = "A".repeat(pre_decode_limit + 1);
2102 let data_url = format!("data:application/octet-stream;base64,{padding}");
2103 let result = gen_meta_code_v0("test", None, Some(&data_url), 64);
2104 assert!(
2105 matches!(result, Err(IsccError::InvalidInput(ref msg)) if msg.contains("size limit")),
2106 "oversized Data-URL should be rejected before decoding"
2107 );
2108 }
2109
2110 fn write_temp_file(name: &str, data: &[u8]) -> std::path::PathBuf {
2114 let path = std::env::temp_dir().join(format!("iscc_test_{name}"));
2115 std::fs::write(&path, data).expect("failed to write temp file");
2116 path
2117 }
2118
2119 #[test]
2120 fn test_gen_sum_code_v0_equivalence() {
2121 let data = b"Hello, ISCC World! This is a test of gen_sum_code_v0.";
2122 let path = write_temp_file("sum_equiv", data);
2123
2124 let sum_result = gen_sum_code_v0(&path, 64, false).unwrap();
2125
2126 let data_result = gen_data_code_v0(data, 64).unwrap();
2128 let instance_result = gen_instance_code_v0(data, 64).unwrap();
2129 let iscc_result =
2130 gen_iscc_code_v0(&[&data_result.iscc, &instance_result.iscc], false).unwrap();
2131
2132 assert_eq!(sum_result.iscc, iscc_result.iscc);
2133 assert_eq!(sum_result.datahash, instance_result.datahash);
2134 assert_eq!(sum_result.filesize, instance_result.filesize);
2135 assert_eq!(sum_result.filesize, data.len() as u64);
2136
2137 std::fs::remove_file(&path).ok();
2138 }
2139
2140 #[test]
2141 fn test_gen_sum_code_v0_empty_file() {
2142 let path = write_temp_file("sum_empty", b"");
2143
2144 let sum_result = gen_sum_code_v0(&path, 64, false).unwrap();
2145
2146 let data_result = gen_data_code_v0(b"", 64).unwrap();
2147 let instance_result = gen_instance_code_v0(b"", 64).unwrap();
2148 let iscc_result =
2149 gen_iscc_code_v0(&[&data_result.iscc, &instance_result.iscc], false).unwrap();
2150
2151 assert_eq!(sum_result.iscc, iscc_result.iscc);
2152 assert_eq!(sum_result.datahash, instance_result.datahash);
2153 assert_eq!(sum_result.filesize, 0);
2154
2155 std::fs::remove_file(&path).ok();
2156 }
2157
2158 #[test]
2159 fn test_gen_sum_code_v0_file_not_found() {
2160 let path = std::env::temp_dir().join("iscc_test_nonexistent_file_xyz");
2161 let result = gen_sum_code_v0(&path, 64, false);
2162 assert!(result.is_err());
2163 let err_msg = result.unwrap_err().to_string();
2164 assert!(
2165 err_msg.contains("Cannot open file"),
2166 "error message should mention file open failure: {err_msg}"
2167 );
2168 }
2169
2170 #[test]
2171 fn test_gen_sum_code_v0_wide_mode() {
2172 let data = b"Testing wide mode for gen_sum_code_v0 function.";
2173 let path = write_temp_file("sum_wide", data);
2174
2175 let narrow = gen_sum_code_v0(&path, 64, false).unwrap();
2176 let wide = gen_sum_code_v0(&path, 64, true).unwrap();
2177
2178 assert_eq!(narrow.iscc, wide.iscc);
2180
2181 let narrow_128 = gen_sum_code_v0(&path, 128, false).unwrap();
2183 let wide_128 = gen_sum_code_v0(&path, 128, true).unwrap();
2184 assert_ne!(narrow_128.iscc, wide_128.iscc);
2185
2186 assert_eq!(narrow_128.datahash, wide_128.datahash);
2188 assert_eq!(narrow_128.filesize, wide_128.filesize);
2189
2190 std::fs::remove_file(&path).ok();
2191 }
2192
2193 #[test]
2194 fn test_gen_sum_code_v0_bits_64() {
2195 let data = b"Testing 64-bit gen_sum_code_v0.";
2196 let path = write_temp_file("sum_bits64", data);
2197
2198 let sum_result = gen_sum_code_v0(&path, 64, false).unwrap();
2199
2200 let data_result = gen_data_code_v0(data, 64).unwrap();
2201 let instance_result = gen_instance_code_v0(data, 64).unwrap();
2202 let iscc_result =
2203 gen_iscc_code_v0(&[&data_result.iscc, &instance_result.iscc], false).unwrap();
2204
2205 assert_eq!(sum_result.iscc, iscc_result.iscc);
2206
2207 std::fs::remove_file(&path).ok();
2208 }
2209
2210 #[test]
2211 fn test_gen_sum_code_v0_bits_128() {
2212 let data = b"Testing 128-bit gen_sum_code_v0.";
2213 let path = write_temp_file("sum_bits128", data);
2214
2215 let sum_result = gen_sum_code_v0(&path, 128, false).unwrap();
2216
2217 let data_result = gen_data_code_v0(data, 128).unwrap();
2218 let instance_result = gen_instance_code_v0(data, 128).unwrap();
2219 let iscc_result =
2220 gen_iscc_code_v0(&[&data_result.iscc, &instance_result.iscc], false).unwrap();
2221
2222 assert_eq!(sum_result.iscc, iscc_result.iscc);
2223 assert_eq!(sum_result.datahash, instance_result.datahash);
2224 assert_eq!(sum_result.filesize, data.len() as u64);
2225
2226 std::fs::remove_file(&path).ok();
2227 }
2228
2229 #[test]
2230 fn test_gen_sum_code_v0_large_data() {
2231 let data: Vec<u8> = (0..50_000).map(|i| (i % 256) as u8).collect();
2233 let path = write_temp_file("sum_large", &data);
2234
2235 let sum_result = gen_sum_code_v0(&path, 64, false).unwrap();
2236
2237 let data_result = gen_data_code_v0(&data, 64).unwrap();
2238 let instance_result = gen_instance_code_v0(&data, 64).unwrap();
2239 let iscc_result =
2240 gen_iscc_code_v0(&[&data_result.iscc, &instance_result.iscc], false).unwrap();
2241
2242 assert_eq!(sum_result.iscc, iscc_result.iscc);
2243 assert_eq!(sum_result.datahash, instance_result.datahash);
2244 assert_eq!(sum_result.filesize, data.len() as u64);
2245
2246 std::fs::remove_file(&path).ok();
2247 }
2248}