Skip to main content

iscc_lib/
codec.rs

1//! ISCC codec: type enums, header encoding/decoding, base32, and component encoding.
2//!
3//! Provides the foundational encoding primitives that all `gen_*_v0` functions
4//! depend on to produce ISCC-encoded output strings. This is a Tier 2 module —
5//! available to Rust consumers but not exposed through FFI bindings.
6
7use crate::{IsccError, IsccResult};
8
9// ---- Type Enums ----
10
11/// ISCC MainType identifier.
12///
13/// Integer values match the `iscc-core` Python reference (MT enum).
14#[repr(u8)]
15#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
16pub enum MainType {
17    Meta = 0,
18    Semantic = 1,
19    Content = 2,
20    Data = 3,
21    Instance = 4,
22    Iscc = 5,
23    Id = 6,
24    Flake = 7,
25}
26
27impl TryFrom<u8> for MainType {
28    type Error = IsccError;
29
30    fn try_from(value: u8) -> Result<Self, Self::Error> {
31        match value {
32            0 => Ok(Self::Meta),
33            1 => Ok(Self::Semantic),
34            2 => Ok(Self::Content),
35            3 => Ok(Self::Data),
36            4 => Ok(Self::Instance),
37            5 => Ok(Self::Iscc),
38            6 => Ok(Self::Id),
39            7 => Ok(Self::Flake),
40            _ => Err(IsccError::InvalidInput(format!(
41                "invalid MainType: {value}"
42            ))),
43        }
44    }
45}
46
47/// ISCC SubType identifier.
48///
49/// A unified enum covering all subtype contexts (ST, ST_CC, ST_ISCC).
50/// The interpretation depends on the MainType context. Integer values
51/// match the `iscc-core` Python reference.
52#[repr(u8)]
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum SubType {
55    /// No specific subtype (general) / Text content (ST_CC context).
56    None = 0,
57    /// Image content.
58    Image = 1,
59    /// Audio content.
60    Audio = 2,
61    /// Video content.
62    Video = 3,
63    /// Mixed content.
64    Mixed = 4,
65    /// ISCC composite summary (only 2 mandatory units, no optional).
66    Sum = 5,
67    /// ISCC no specific content type (3+ units, mixed subtypes).
68    IsccNone = 6,
69    /// ISCC wide mode (256-bit Data+Instance composite).
70    Wide = 7,
71}
72
73impl SubType {
74    /// Alias for `None` (value 0) in Content-Code / Semantic-Code context.
75    pub const TEXT: Self = Self::None;
76}
77
78impl TryFrom<u8> for SubType {
79    type Error = IsccError;
80
81    fn try_from(value: u8) -> Result<Self, Self::Error> {
82        match value {
83            0 => Ok(Self::None),
84            1 => Ok(Self::Image),
85            2 => Ok(Self::Audio),
86            3 => Ok(Self::Video),
87            4 => Ok(Self::Mixed),
88            5 => Ok(Self::Sum),
89            6 => Ok(Self::IsccNone),
90            7 => Ok(Self::Wide),
91            _ => Err(IsccError::InvalidInput(format!("invalid SubType: {value}"))),
92        }
93    }
94}
95
96/// ISCC version identifier.
97#[repr(u8)]
98#[derive(Debug, Clone, Copy, PartialEq, Eq)]
99pub enum Version {
100    V0 = 0,
101}
102
103impl TryFrom<u8> for Version {
104    type Error = IsccError;
105
106    fn try_from(value: u8) -> Result<Self, Self::Error> {
107        match value {
108            0 => Ok(Self::V0),
109            _ => Err(IsccError::InvalidInput(format!("invalid Version: {value}"))),
110        }
111    }
112}
113
114// ---- Bit Manipulation Helpers ----
115
116/// Read bit at position `bit_pos` from byte slice (MSB-first ordering).
117fn get_bit(data: &[u8], bit_pos: usize) -> bool {
118    let byte_idx = bit_pos / 8;
119    let bit_idx = 7 - (bit_pos % 8);
120    (data[byte_idx] >> bit_idx) & 1 == 1
121}
122
123/// Extract `count` bits starting at `bit_pos` as a u32 (MSB-first).
124fn extract_bits(data: &[u8], bit_pos: usize, count: usize) -> u32 {
125    let mut value = 0u32;
126    for i in 0..count {
127        value = (value << 1) | u32::from(get_bit(data, bit_pos + i));
128    }
129    value
130}
131
132/// Convert a bit slice (big-endian, MSB first) to a u32.
133#[cfg(test)]
134fn bits_to_u32(bits: &[bool]) -> u32 {
135    bits.iter().fold(0u32, |acc, &b| (acc << 1) | u32::from(b))
136}
137
138/// Convert bytes to a bit vector (big-endian, MSB first).
139#[cfg(test)]
140fn bytes_to_bits(bytes: &[u8]) -> Vec<bool> {
141    bytes
142        .iter()
143        .flat_map(|&byte| (0..8).rev().map(move |i| (byte >> i) & 1 == 1))
144        .collect()
145}
146
147/// Convert a bit vector to bytes, padding with zero bits on the right.
148fn bits_to_bytes(bits: &[bool]) -> Vec<u8> {
149    bits.chunks(8)
150        .map(|chunk| {
151            chunk.iter().enumerate().fold(
152                0u8,
153                |byte, (i, &bit)| if bit { byte | (1 << (7 - i)) } else { byte },
154            )
155        })
156        .collect()
157}
158
159// ---- Varnibble Encoding ----
160
161/// Encode an integer as a variable-length nibble (varnibble) bit sequence.
162///
163/// Encoding scheme:
164/// - `0xxx` (4 bits, 1 nibble): values 0–7
165/// - `10xxxxxx` (8 bits, 2 nibbles): values 8–71
166/// - `110xxxxxxxxx` (12 bits, 3 nibbles): values 72–583
167/// - `1110xxxxxxxxxxxx` (16 bits, 4 nibbles): values 584–4679
168fn encode_varnibble(value: u32) -> IsccResult<Vec<bool>> {
169    match value {
170        0..=7 => {
171            // 4 bits: value fits directly (leading 0 implicit in 4-bit encoding)
172            Ok((0..4).rev().map(|i| (value >> i) & 1 == 1).collect())
173        }
174        8..=71 => {
175            // 8 bits: prefix 10 + 6 data bits for (value - 8)
176            let v = value - 8;
177            let mut bits = vec![true, false];
178            bits.extend((0..6).rev().map(|i| (v >> i) & 1 == 1));
179            Ok(bits)
180        }
181        72..=583 => {
182            // 12 bits: prefix 110 + 9 data bits for (value - 72)
183            let v = value - 72;
184            let mut bits = vec![true, true, false];
185            bits.extend((0..9).rev().map(|i| (v >> i) & 1 == 1));
186            Ok(bits)
187        }
188        584..=4679 => {
189            // 16 bits: prefix 1110 + 12 data bits for (value - 584)
190            let v = value - 584;
191            let mut bits = vec![true, true, true, false];
192            bits.extend((0..12).rev().map(|i| (v >> i) & 1 == 1));
193            Ok(bits)
194        }
195        _ => Err(IsccError::InvalidInput(format!(
196            "varnibble value out of range (0-4679): {value}"
197        ))),
198    }
199}
200
201/// Decode the first varnibble from a byte slice at the given bit position.
202///
203/// Operates directly on `&[u8]` with bitwise extraction, avoiding any
204/// intermediate `Vec<bool>` allocation. Returns the decoded integer and
205/// the number of bits consumed.
206fn decode_varnibble_from_bytes(data: &[u8], bit_pos: usize) -> IsccResult<(u32, usize)> {
207    let available = data.len() * 8 - bit_pos;
208    if available < 4 {
209        return Err(IsccError::InvalidInput(
210            "insufficient bits for varnibble".into(),
211        ));
212    }
213
214    if !get_bit(data, bit_pos) {
215        // 0xxx — 4 bits, values 0–7
216        Ok((extract_bits(data, bit_pos, 4), 4))
217    } else if available >= 8 && !get_bit(data, bit_pos + 1) {
218        // 10xxxxxx — 8 bits, values 8–71
219        Ok((extract_bits(data, bit_pos + 2, 6) + 8, 8))
220    } else if available >= 12 && !get_bit(data, bit_pos + 2) {
221        // 110xxxxxxxxx — 12 bits, values 72–583
222        Ok((extract_bits(data, bit_pos + 3, 9) + 72, 12))
223    } else if available >= 16 && !get_bit(data, bit_pos + 3) {
224        // 1110xxxxxxxxxxxx — 16 bits, values 584–4679
225        Ok((extract_bits(data, bit_pos + 4, 12) + 584, 16))
226    } else {
227        Err(IsccError::InvalidInput(
228            "invalid varnibble prefix or insufficient bits".into(),
229        ))
230    }
231}
232
233// ---- Header Encoding ----
234
235/// Encode ISCC header fields into bytes.
236///
237/// Concatenates varnibble-encoded MainType, SubType, Version, and length,
238/// then pads to byte boundary with zero bits on the right.
239/// Result is 2 bytes minimum (typical case), up to 8 bytes maximum.
240pub fn encode_header(
241    mtype: MainType,
242    stype: SubType,
243    version: Version,
244    length: u32,
245) -> IsccResult<Vec<u8>> {
246    let mut bits = Vec::new();
247    bits.extend(encode_varnibble(mtype as u32)?);
248    bits.extend(encode_varnibble(stype as u32)?);
249    bits.extend(encode_varnibble(version as u32)?);
250    bits.extend(encode_varnibble(length)?);
251
252    // Pad to byte boundary with zero bits (equivalent to bitarray.fill())
253    let remainder = bits.len() % 8;
254    if remainder != 0 {
255        bits.resize(bits.len() + (8 - remainder), false);
256    }
257
258    Ok(bits_to_bytes(&bits))
259}
260
261/// Decode ISCC header from bytes.
262///
263/// Operates directly on `&[u8]` with bitwise extraction, avoiding any
264/// intermediate `Vec<bool>` allocation. Returns `(MainType, SubType,
265/// Version, length, tail_bytes)` where `tail_bytes` contains any
266/// remaining data after the header.
267pub fn decode_header(data: &[u8]) -> IsccResult<(MainType, SubType, Version, u32, Vec<u8>)> {
268    let mut bit_pos = 0;
269
270    let (mtype_val, consumed) = decode_varnibble_from_bytes(data, bit_pos)?;
271    bit_pos += consumed;
272
273    let (stype_val, consumed) = decode_varnibble_from_bytes(data, bit_pos)?;
274    bit_pos += consumed;
275
276    let (version_val, consumed) = decode_varnibble_from_bytes(data, bit_pos)?;
277    bit_pos += consumed;
278
279    let (length, consumed) = decode_varnibble_from_bytes(data, bit_pos)?;
280    bit_pos += consumed;
281
282    // Strip 4-bit zero padding if header bits are not byte-aligned.
283    // Since each varnibble is a multiple of 4 bits, misalignment is always 4 bits.
284    if bit_pos % 8 != 0 && bit_pos + 4 <= data.len() * 8 && extract_bits(data, bit_pos, 4) == 0 {
285        bit_pos += 4;
286    }
287
288    // Advance to next byte boundary for tail extraction
289    let tail_byte_start = bit_pos.div_ceil(8);
290    let tail = if tail_byte_start < data.len() {
291        data[tail_byte_start..].to_vec()
292    } else {
293        vec![]
294    };
295
296    let mtype = MainType::try_from(mtype_val as u8)?;
297    let stype = SubType::try_from(stype_val as u8)?;
298    let version = Version::try_from(version_val as u8)?;
299
300    Ok((mtype, stype, version, length, tail))
301}
302
303// ---- Length Encoding ----
304
305/// Encode bit length to header length field value.
306///
307/// Semantics depend on MainType:
308/// - META/SEMANTIC/CONTENT/DATA/INSTANCE/FLAKE: `(bit_length / 32) - 1`
309/// - ISCC: pass-through (0–7, unit composition flags)
310/// - ID: `(bit_length - 64) / 8`
311pub fn encode_length(mtype: MainType, length: u32) -> IsccResult<u32> {
312    match mtype {
313        MainType::Meta
314        | MainType::Semantic
315        | MainType::Content
316        | MainType::Data
317        | MainType::Instance
318        | MainType::Flake => {
319            if length >= 32 && length % 32 == 0 {
320                Ok(length / 32 - 1)
321            } else {
322                Err(IsccError::InvalidInput(format!(
323                    "invalid length {length} for {mtype:?} (must be multiple of 32, >= 32)"
324                )))
325            }
326        }
327        MainType::Iscc => {
328            if length <= 7 {
329                Ok(length)
330            } else {
331                Err(IsccError::InvalidInput(format!(
332                    "invalid length {length} for ISCC (must be 0-7)"
333                )))
334            }
335        }
336        MainType::Id => {
337            if (64..=96).contains(&length) && (length - 64) % 8 == 0 {
338                Ok((length - 64) / 8)
339            } else {
340                Err(IsccError::InvalidInput(format!(
341                    "invalid length {length} for ID (must be 64-96, step 8)"
342                )))
343            }
344        }
345    }
346}
347
348/// Decode header length field to actual bit length.
349///
350/// Inverse of `encode_length`. Returns the number of bits in the digest.
351/// - META/SEMANTIC/CONTENT/DATA/INSTANCE/FLAKE: `(length + 1) * 32`
352/// - ISCC + Wide: 256
353/// - ISCC + other: `popcount(length) * 64 + 128`
354/// - ID: `length * 8 + 64`
355pub fn decode_length(mtype: MainType, length: u32, stype: SubType) -> u32 {
356    match mtype {
357        MainType::Meta
358        | MainType::Semantic
359        | MainType::Content
360        | MainType::Data
361        | MainType::Instance
362        | MainType::Flake => (length + 1) * 32,
363        MainType::Iscc => {
364            if stype == SubType::Wide {
365                256
366            } else {
367                length.count_ones() * 64 + 128
368            }
369        }
370        MainType::Id => length * 8 + 64,
371    }
372}
373
374// ---- Unit Encoding ----
375
376/// Encode optional ISCC-UNIT MainTypes as a unit combination index (0–7).
377///
378/// Maps the optional units (Meta, Semantic, Content) present in a composite
379/// ISCC-CODE to a bitfield index. Data and Instance are mandatory and must
380/// not be included. The bitfield pattern is:
381/// bit 0 = Content, bit 1 = Semantic, bit 2 = Meta.
382pub fn encode_units(main_types: &[MainType]) -> IsccResult<u32> {
383    let mut result = 0u32;
384    for &mt in main_types {
385        match mt {
386            MainType::Content => result |= 1,
387            MainType::Semantic => result |= 2,
388            MainType::Meta => result |= 4,
389            _ => {
390                return Err(IsccError::InvalidInput(format!(
391                    "{mt:?} is not a valid optional unit type"
392                )));
393            }
394        }
395    }
396    Ok(result)
397}
398
399/// Decode a unit combination index (0–7) to a sorted list of optional MainTypes.
400///
401/// Inverse of `encode_units`. Decodes the 3-bit bitfield:
402/// bit 0 = Content, bit 1 = Semantic, bit 2 = Meta. Results are returned
403/// in MainType discriminant order (Meta, Semantic, Content) so they are
404/// automatically sorted.
405pub fn decode_units(unit_id: u32) -> IsccResult<Vec<MainType>> {
406    if unit_id > 7 {
407        return Err(IsccError::InvalidInput(format!(
408            "invalid unit_id: {unit_id} (must be 0-7)"
409        )));
410    }
411    let mut result = Vec::new();
412    if unit_id & 4 != 0 {
413        result.push(MainType::Meta);
414    }
415    if unit_id & 2 != 0 {
416        result.push(MainType::Semantic);
417    }
418    if unit_id & 1 != 0 {
419        result.push(MainType::Content);
420    }
421    Ok(result)
422}
423
424// ---- Base32 Encoding ----
425
426/// Encode bytes as base32 (RFC 4648, uppercase, no padding).
427pub fn encode_base32(data: &[u8]) -> String {
428    data_encoding::BASE32_NOPAD.encode(data)
429}
430
431/// Decode base32 string to bytes (case-insensitive, no padding expected).
432pub fn decode_base32(code: &str) -> IsccResult<Vec<u8>> {
433    let upper = code.to_uppercase();
434    data_encoding::BASE32_NOPAD
435        .decode(upper.as_bytes())
436        .map_err(|e| IsccError::InvalidInput(format!("base32 decode error: {e}")))
437}
438
439// ---- Base64 Encoding ----
440
441/// Encode bytes as base64url (RFC 4648 §5, no padding).
442pub fn encode_base64(data: &[u8]) -> String {
443    data_encoding::BASE64URL_NOPAD.encode(data)
444}
445
446// ---- Component Encoding ----
447
448/// Encode an ISCC-UNIT with header and body as a base32 string.
449///
450/// Produces the base32-encoded string (without "ISCC:" prefix). Callers
451/// add the prefix when constructing the final ISCC string.
452///
453/// Note: ISCC-CODEs (MainType::Iscc) are not encoded via this function —
454/// `gen_iscc_code_v0` constructs the composite header directly.
455pub fn encode_component(
456    mtype: MainType,
457    stype: SubType,
458    version: Version,
459    bit_length: u32,
460    digest: &[u8],
461) -> IsccResult<String> {
462    if mtype == MainType::Iscc {
463        return Err(IsccError::InvalidInput(
464            "ISCC MainType is not a unit; use gen_iscc_code_v0 instead".into(),
465        ));
466    }
467
468    let encoded_length = encode_length(mtype, bit_length)?;
469    let nbytes = (bit_length / 8) as usize;
470    let header = encode_header(mtype, stype, version, encoded_length)?;
471    let body = &digest[..nbytes.min(digest.len())];
472
473    let mut component = header;
474    component.extend_from_slice(body);
475
476    Ok(encode_base32(&component))
477}
478
479/// Decompose a composite ISCC-CODE or ISCC sequence into individual ISCC-UNITs.
480///
481/// Accepts a normalized ISCC-CODE or a concatenated sequence of ISCC-UNITs.
482/// The optional "ISCC:" prefix is stripped before decoding. Returns a list
483/// of base32-encoded ISCC-UNIT strings (without "ISCC:" prefix).
484pub fn iscc_decompose(iscc_code: &str) -> IsccResult<Vec<String>> {
485    let clean = iscc_code.strip_prefix("ISCC:").unwrap_or(iscc_code);
486    let mut raw_code = decode_base32(clean)?;
487    let mut components = Vec::new();
488
489    while !raw_code.is_empty() {
490        let (mt, st, vs, ln, body) = decode_header(&raw_code)?;
491
492        // Standard ISCC-UNIT with tail continuation
493        if mt != MainType::Iscc {
494            let ln_bits = decode_length(mt, ln, st);
495            let nbytes = (ln_bits / 8) as usize;
496            if body.len() < nbytes {
497                return Err(IsccError::InvalidInput(format!(
498                    "truncated ISCC body: expected {nbytes} bytes, got {}",
499                    body.len()
500                )));
501            }
502            let code = encode_component(mt, st, vs, ln_bits, &body[..nbytes])?;
503            components.push(code);
504            raw_code = body[nbytes..].to_vec();
505            continue;
506        }
507
508        // ISCC-CODE: decode into constituent units
509        let main_types = decode_units(ln)?;
510
511        // Wide mode: 128-bit Data-Code + 128-bit Instance-Code
512        if st == SubType::Wide {
513            if body.len() < 32 {
514                return Err(IsccError::InvalidInput(format!(
515                    "truncated ISCC body: expected 32 bytes, got {}",
516                    body.len()
517                )));
518            }
519            let data_code = encode_component(MainType::Data, SubType::None, vs, 128, &body[..16])?;
520            let instance_code =
521                encode_component(MainType::Instance, SubType::None, vs, 128, &body[16..32])?;
522            components.push(data_code);
523            components.push(instance_code);
524            break;
525        }
526
527        // Non-wide ISCC-CODE: total body = dynamic units × 8 + Data 8 + Instance 8
528        let expected_body = main_types.len() * 8 + 16;
529        if body.len() < expected_body {
530            return Err(IsccError::InvalidInput(format!(
531                "truncated ISCC body: expected {expected_body} bytes, got {}",
532                body.len()
533            )));
534        }
535
536        // Rebuild dynamic units (Meta, Semantic, Content)
537        for (idx, &mtype) in main_types.iter().enumerate() {
538            let stype = if mtype == MainType::Meta {
539                SubType::None
540            } else {
541                st
542            };
543            let code = encode_component(mtype, stype, vs, 64, &body[idx * 8..])?;
544            components.push(code);
545        }
546
547        // Rebuild static units (Data-Code, Instance-Code)
548        let data_code = encode_component(
549            MainType::Data,
550            SubType::None,
551            vs,
552            64,
553            &body[body.len() - 16..body.len() - 8],
554        )?;
555        let instance_code = encode_component(
556            MainType::Instance,
557            SubType::None,
558            vs,
559            64,
560            &body[body.len() - 8..],
561        )?;
562        components.push(data_code);
563        components.push(instance_code);
564        break;
565    }
566
567    Ok(components)
568}
569
570#[cfg(test)]
571mod tests {
572    use super::*;
573
574    // ---- Varnibble roundtrip tests ----
575
576    #[test]
577    fn test_varnibble_roundtrip() {
578        let test_values = [0, 1, 7, 8, 71, 72, 583, 584, 4679];
579        for &value in &test_values {
580            let bits = encode_varnibble(value).unwrap();
581            let bytes = bits_to_bytes(&bits);
582            let (decoded, consumed) = decode_varnibble_from_bytes(&bytes, 0).unwrap();
583            assert_eq!(decoded, value, "roundtrip failed for value {value}");
584            assert_eq!(consumed, bits.len(), "consumed mismatch for value {value}");
585        }
586    }
587
588    #[test]
589    fn test_varnibble_bit_lengths() {
590        // 0-7: 4 bits (1 nibble)
591        assert_eq!(encode_varnibble(0).unwrap().len(), 4);
592        assert_eq!(encode_varnibble(7).unwrap().len(), 4);
593        // 8-71: 8 bits (2 nibbles)
594        assert_eq!(encode_varnibble(8).unwrap().len(), 8);
595        assert_eq!(encode_varnibble(71).unwrap().len(), 8);
596        // 72-583: 12 bits (3 nibbles)
597        assert_eq!(encode_varnibble(72).unwrap().len(), 12);
598        assert_eq!(encode_varnibble(583).unwrap().len(), 12);
599        // 584-4679: 16 bits (4 nibbles)
600        assert_eq!(encode_varnibble(584).unwrap().len(), 16);
601        assert_eq!(encode_varnibble(4679).unwrap().len(), 16);
602    }
603
604    #[test]
605    fn test_varnibble_out_of_range() {
606        assert!(encode_varnibble(4680).is_err());
607    }
608
609    #[test]
610    fn test_varnibble_boundary_values() {
611        // Verify exact bit patterns at boundaries
612        let bits_0 = encode_varnibble(0).unwrap();
613        assert_eq!(bits_0, vec![false, false, false, false]); // 0000
614
615        let bits_7 = encode_varnibble(7).unwrap();
616        assert_eq!(bits_7, vec![false, true, true, true]); // 0111
617
618        let bits_8 = encode_varnibble(8).unwrap();
619        assert_eq!(
620            bits_8,
621            vec![true, false, false, false, false, false, false, false]
622        ); // 10 000000
623    }
624
625    // ---- Bitwise extraction tests ----
626
627    #[test]
628    fn test_extract_bits_basic() {
629        // 0xA5 = 1010_0101 in binary
630        let data = [0xA5u8];
631        assert_eq!(extract_bits(&data, 0, 4), 0b1010); // first nibble
632        assert_eq!(extract_bits(&data, 4, 4), 0b0101); // second nibble
633        assert_eq!(extract_bits(&data, 0, 8), 0xA5); // full byte
634        assert_eq!(extract_bits(&data, 1, 3), 0b010); // bits 1-3
635        assert_eq!(extract_bits(&data, 0, 1), 1); // MSB
636        assert_eq!(extract_bits(&data, 7, 1), 1); // LSB
637
638        // Multi-byte: 0xFF 0x00 = 1111_1111 0000_0000
639        let data2 = [0xFF, 0x00];
640        assert_eq!(extract_bits(&data2, 0, 8), 0xFF);
641        assert_eq!(extract_bits(&data2, 8, 8), 0x00);
642        assert_eq!(extract_bits(&data2, 4, 8), 0xF0); // crossing byte boundary
643        assert_eq!(extract_bits(&data2, 6, 4), 0b1100); // crossing byte boundary
644    }
645
646    #[test]
647    fn test_decode_varnibble_from_bytes_boundary_values() {
648        // Test decoding at non-zero bit offsets within a byte slice.
649        // Encode two varnibbles into a single byte sequence and decode both.
650
651        // varnibble(3) = 0011 (4 bits) + varnibble(8) = 10_000000 (8 bits) = 12 bits
652        let bits_3 = encode_varnibble(3).unwrap();
653        let bits_8 = encode_varnibble(8).unwrap();
654        let mut combined_bits = bits_3.clone();
655        combined_bits.extend(&bits_8);
656        let bytes = bits_to_bytes(&combined_bits);
657
658        // Decode first varnibble at bit 0
659        let (val1, consumed1) = decode_varnibble_from_bytes(&bytes, 0).unwrap();
660        assert_eq!(val1, 3);
661        assert_eq!(consumed1, 4);
662
663        // Decode second varnibble at bit 4 (non-zero offset)
664        let (val2, consumed2) = decode_varnibble_from_bytes(&bytes, 4).unwrap();
665        assert_eq!(val2, 8);
666        assert_eq!(consumed2, 8);
667
668        // Test with a 3-nibble value at offset
669        // varnibble(0) = 0000 (4 bits) + varnibble(72) = 110_000000000 (12 bits)
670        let bits_0 = encode_varnibble(0).unwrap();
671        let bits_72 = encode_varnibble(72).unwrap();
672        let mut combined2 = bits_0;
673        combined2.extend(&bits_72);
674        let bytes2 = bits_to_bytes(&combined2);
675
676        let (val3, consumed3) = decode_varnibble_from_bytes(&bytes2, 4).unwrap();
677        assert_eq!(val3, 72);
678        assert_eq!(consumed3, 12);
679
680        // Test insufficient bits at offset
681        let single_byte = [0x00u8];
682        let result = decode_varnibble_from_bytes(&single_byte, 6);
683        assert!(result.is_err(), "should fail with only 2 bits available");
684    }
685
686    // ---- Header encoding tests ----
687
688    #[test]
689    fn test_encode_header_meta_v0() {
690        // encode_header(META=0, NONE=0, V0=0, length=1) → 2 bytes
691        let header = encode_header(MainType::Meta, SubType::None, Version::V0, 1).unwrap();
692        assert_eq!(header, vec![0x00, 0x01]);
693    }
694
695    #[test]
696    fn test_encode_header_with_padding() {
697        // encode_header(META=0, NONE=0, V0=0, length=8)
698        // varnibble(0)=4b + varnibble(0)=4b + varnibble(0)=4b + varnibble(8)=8b = 20 bits
699        // Padded to 24 bits = 3 bytes
700        let header = encode_header(MainType::Meta, SubType::None, Version::V0, 8).unwrap();
701        assert_eq!(header.len(), 3);
702        // bits: 0000 0000 0000 10|000000 0000
703        //       ^^^^ ^^^^ ^^^^ ^^^^^^^^ ^^^^(pad)
704        assert_eq!(header, vec![0x00, 0x08, 0x00]);
705    }
706
707    #[test]
708    fn test_encode_header_data_type() {
709        // DATA=3, NONE=0, V0=0, length=1
710        let header = encode_header(MainType::Data, SubType::None, Version::V0, 1).unwrap();
711        // varnibble(3)=0011, varnibble(0)=0000, varnibble(0)=0000, varnibble(1)=0001
712        // bits: 0011 0000 0000 0001
713        assert_eq!(header, vec![0x30, 0x01]);
714    }
715
716    #[test]
717    fn test_encode_header_instance_type() {
718        // INSTANCE=4, NONE=0, V0=0, length=1
719        let header = encode_header(MainType::Instance, SubType::None, Version::V0, 1).unwrap();
720        // varnibble(4)=0100, varnibble(0)=0000, varnibble(0)=0000, varnibble(1)=0001
721        // bits: 0100 0000 0000 0001
722        assert_eq!(header, vec![0x40, 0x01]);
723    }
724
725    #[test]
726    fn test_decode_header_roundtrip_all_main_types() {
727        let main_types = [
728            MainType::Meta,
729            MainType::Semantic,
730            MainType::Content,
731            MainType::Data,
732            MainType::Instance,
733            MainType::Iscc,
734            MainType::Id,
735            MainType::Flake,
736        ];
737
738        for &mtype in &main_types {
739            let header = encode_header(mtype, SubType::None, Version::V0, 1).unwrap();
740            let (dec_mtype, dec_stype, dec_version, dec_length, tail) =
741                decode_header(&header).unwrap();
742            assert_eq!(dec_mtype, mtype, "MainType mismatch for {mtype:?}");
743            assert_eq!(dec_stype, SubType::None);
744            assert_eq!(dec_version, Version::V0);
745            assert_eq!(dec_length, 1);
746            assert!(tail.is_empty(), "unexpected tail for {mtype:?}");
747        }
748    }
749
750    #[test]
751    fn test_decode_header_with_tail() {
752        // Simulate header + 8 bytes body
753        let header = encode_header(MainType::Meta, SubType::None, Version::V0, 1).unwrap();
754        let body = vec![0xAA, 0xBB, 0xCC, 0xDD, 0x11, 0x22, 0x33, 0x44];
755        let mut data = header;
756        data.extend_from_slice(&body);
757
758        let (mtype, stype, version, length, tail) = decode_header(&data).unwrap();
759        assert_eq!(mtype, MainType::Meta);
760        assert_eq!(stype, SubType::None);
761        assert_eq!(version, Version::V0);
762        assert_eq!(length, 1);
763        assert_eq!(tail, body);
764    }
765
766    #[test]
767    fn test_decode_header_with_padding_and_tail() {
768        // Header with padding (3 bytes) + body
769        let header = encode_header(MainType::Meta, SubType::None, Version::V0, 8).unwrap();
770        assert_eq!(header.len(), 3); // 20 bits padded to 24
771
772        let body = vec![0xFF, 0xEE];
773        let mut data = header;
774        data.extend_from_slice(&body);
775
776        let (mtype, _stype, _version, length, tail) = decode_header(&data).unwrap();
777        assert_eq!(mtype, MainType::Meta);
778        assert_eq!(length, 8);
779        assert_eq!(tail, body);
780    }
781
782    #[test]
783    fn test_decode_header_subtypes() {
784        // Test with non-zero subtype
785        let header = encode_header(MainType::Content, SubType::Image, Version::V0, 1).unwrap();
786        let (mtype, stype, version, length, _tail) = decode_header(&header).unwrap();
787        assert_eq!(mtype, MainType::Content);
788        assert_eq!(stype, SubType::Image);
789        assert_eq!(version, Version::V0);
790        assert_eq!(length, 1);
791    }
792
793    // ---- Length encoding tests ----
794
795    #[test]
796    fn test_encode_length_standard_types() {
797        // (bit_length / 32) - 1
798        assert_eq!(encode_length(MainType::Meta, 32).unwrap(), 0);
799        assert_eq!(encode_length(MainType::Meta, 64).unwrap(), 1);
800        assert_eq!(encode_length(MainType::Meta, 96).unwrap(), 2);
801        assert_eq!(encode_length(MainType::Meta, 128).unwrap(), 3);
802        assert_eq!(encode_length(MainType::Meta, 256).unwrap(), 7);
803        assert_eq!(encode_length(MainType::Data, 64).unwrap(), 1);
804        assert_eq!(encode_length(MainType::Instance, 64).unwrap(), 1);
805    }
806
807    #[test]
808    fn test_encode_length_iscc() {
809        // Pass-through for ISCC (0-7)
810        for i in 0..=7 {
811            assert_eq!(encode_length(MainType::Iscc, i).unwrap(), i);
812        }
813        assert!(encode_length(MainType::Iscc, 8).is_err());
814    }
815
816    #[test]
817    fn test_encode_length_id() {
818        // (bit_length - 64) / 8
819        assert_eq!(encode_length(MainType::Id, 64).unwrap(), 0);
820        assert_eq!(encode_length(MainType::Id, 72).unwrap(), 1);
821        assert_eq!(encode_length(MainType::Id, 80).unwrap(), 2);
822        assert_eq!(encode_length(MainType::Id, 96).unwrap(), 4);
823    }
824
825    #[test]
826    fn test_encode_length_invalid() {
827        // Not a multiple of 32
828        assert!(encode_length(MainType::Meta, 48).is_err());
829        // Too small
830        assert!(encode_length(MainType::Meta, 0).is_err());
831        // ID out of range
832        assert!(encode_length(MainType::Id, 63).is_err());
833        assert!(encode_length(MainType::Id, 97).is_err());
834    }
835
836    #[test]
837    fn test_decode_length_standard_types() {
838        // (length + 1) * 32
839        assert_eq!(decode_length(MainType::Meta, 0, SubType::None), 32);
840        assert_eq!(decode_length(MainType::Meta, 1, SubType::None), 64);
841        assert_eq!(decode_length(MainType::Meta, 7, SubType::None), 256);
842        assert_eq!(decode_length(MainType::Data, 1, SubType::None), 64);
843    }
844
845    #[test]
846    fn test_decode_length_iscc() {
847        // Wide → 256
848        assert_eq!(decode_length(MainType::Iscc, 0, SubType::Wide), 256);
849        // Non-wide → popcount(length) * 64 + 128
850        assert_eq!(decode_length(MainType::Iscc, 0, SubType::Sum), 128); // 0 optional units
851        assert_eq!(decode_length(MainType::Iscc, 1, SubType::None), 192); // 1 optional unit
852        assert_eq!(decode_length(MainType::Iscc, 3, SubType::None), 256); // 2 optional units
853        assert_eq!(decode_length(MainType::Iscc, 7, SubType::None), 320); // 3 optional units
854    }
855
856    #[test]
857    fn test_decode_length_id() {
858        // length * 8 + 64
859        assert_eq!(decode_length(MainType::Id, 0, SubType::None), 64);
860        assert_eq!(decode_length(MainType::Id, 1, SubType::None), 72);
861        assert_eq!(decode_length(MainType::Id, 4, SubType::None), 96);
862    }
863
864    #[test]
865    fn test_encode_decode_length_roundtrip() {
866        for &mtype in &[
867            MainType::Meta,
868            MainType::Data,
869            MainType::Instance,
870            MainType::Content,
871        ] {
872            for bit_length in (32..=256).step_by(32) {
873                let encoded = encode_length(mtype, bit_length).unwrap();
874                let decoded = decode_length(mtype, encoded, SubType::None);
875                assert_eq!(
876                    decoded, bit_length,
877                    "roundtrip failed for {mtype:?} bit_length={bit_length}"
878                );
879            }
880        }
881    }
882
883    // ---- Base32 tests ----
884
885    #[test]
886    fn test_base32_roundtrip() {
887        let test_data: &[&[u8]] = &[
888            &[0x00],
889            &[0xFF],
890            &[0x00, 0x01, 0x02, 0x03],
891            &[0xDE, 0xAD, 0xBE, 0xEF, 0xCA, 0xFE],
892            &[0; 10],
893            &[0xFF; 10],
894        ];
895
896        for data in test_data {
897            let encoded = encode_base32(data);
898            let decoded = decode_base32(&encoded).unwrap();
899            assert_eq!(&decoded, data, "base32 roundtrip failed for {data:?}");
900        }
901    }
902
903    #[test]
904    fn test_base32_no_padding() {
905        let encoded = encode_base32(&[0x00, 0x01]);
906        assert!(!encoded.contains('='), "base32 should not contain padding");
907    }
908
909    #[test]
910    fn test_base32_case_insensitive_decode() {
911        let data = vec![0xDE, 0xAD, 0xBE, 0xEF];
912        let encoded = encode_base32(&data);
913        let lower = encoded.to_lowercase();
914        let decoded = decode_base32(&lower).unwrap();
915        assert_eq!(decoded, data);
916    }
917
918    // ---- Base64 encoding tests ----
919
920    #[test]
921    fn test_encode_base64_empty() {
922        assert_eq!(encode_base64(&[]), "");
923    }
924
925    #[test]
926    fn test_encode_base64_known_value() {
927        // Python: base64.urlsafe_b64encode(bytes([0,1,2,3])).decode().rstrip("=") == "AAECAw"
928        assert_eq!(encode_base64(&[0, 1, 2, 3]), "AAECAw");
929    }
930
931    #[test]
932    fn test_encode_base64_roundtrip() {
933        let data: &[&[u8]] = &[
934            &[0xFF],
935            &[0xDE, 0xAD, 0xBE, 0xEF],
936            &[0; 10],
937            &[0xFF; 10],
938            b"Hello World",
939        ];
940        for input in data {
941            let encoded = encode_base64(input);
942            let decoded = data_encoding::BASE64URL_NOPAD
943                .decode(encoded.as_bytes())
944                .unwrap();
945            assert_eq!(&decoded, input, "base64 roundtrip failed for {input:?}");
946        }
947    }
948
949    #[test]
950    fn test_encode_base64_no_padding() {
951        // Various lengths that would normally produce padding
952        for len in 1..=10 {
953            let data = vec![0xABu8; len];
954            let encoded = encode_base64(&data);
955            assert!(
956                !encoded.contains('='),
957                "base64 output must not contain padding for len={len}"
958            );
959        }
960    }
961
962    // ---- encode_component tests ----
963
964    #[test]
965    fn test_encode_component_meta_known_vector() {
966        // gen_meta_code_v0("Hello World") → "ISCC:AAAWKLHFPV6OPKDG"
967        // Decode the known output to extract the digest, then re-encode
968        let known_code = "AAAWKLHFPV6OPKDG";
969        let raw = decode_base32(known_code).unwrap();
970        assert_eq!(raw.len(), 10); // 2 header bytes + 8 digest bytes
971
972        // Verify header decodes correctly
973        let (mtype, stype, version, length, tail) = decode_header(&raw).unwrap();
974        assert_eq!(mtype, MainType::Meta);
975        assert_eq!(stype, SubType::None);
976        assert_eq!(version, Version::V0);
977        assert_eq!(length, 1); // encode_length(META, 64) = 1
978        assert_eq!(tail.len(), 8); // 64-bit digest
979
980        // Re-encode from extracted digest
981        let result =
982            encode_component(MainType::Meta, SubType::None, Version::V0, 64, &tail).unwrap();
983        assert_eq!(result, known_code);
984    }
985
986    #[test]
987    fn test_encode_component_rejects_iscc_maintype() {
988        assert!(
989            encode_component(MainType::Iscc, SubType::Sum, Version::V0, 128, &[0; 16],).is_err()
990        );
991    }
992
993    #[test]
994    fn test_encode_component_data_type() {
995        // Encode a Data-Code component and verify roundtrip
996        let digest = [0xAA; 32];
997        let code =
998            encode_component(MainType::Data, SubType::None, Version::V0, 64, &digest).unwrap();
999
1000        // Decode and verify
1001        let raw = decode_base32(&code).unwrap();
1002        let (mtype, stype, version, length, tail) = decode_header(&raw).unwrap();
1003        assert_eq!(mtype, MainType::Data);
1004        assert_eq!(stype, SubType::None);
1005        assert_eq!(version, Version::V0);
1006        assert_eq!(length, 1); // encode_length(DATA, 64) = 1
1007        assert_eq!(tail, &digest[..8]); // 64 bits = 8 bytes
1008    }
1009
1010    #[test]
1011    fn test_encode_component_content_image() {
1012        let digest = [0x55; 16];
1013        let code =
1014            encode_component(MainType::Content, SubType::Image, Version::V0, 128, &digest).unwrap();
1015
1016        let raw = decode_base32(&code).unwrap();
1017        let (mtype, stype, _version, length, tail) = decode_header(&raw).unwrap();
1018        assert_eq!(mtype, MainType::Content);
1019        assert_eq!(stype, SubType::Image);
1020        assert_eq!(length, 3); // encode_length(CONTENT, 128) = 3
1021        assert_eq!(tail, &digest[..]); // 128 bits = 16 bytes
1022    }
1023
1024    // ---- TryFrom tests ----
1025
1026    #[test]
1027    fn test_maintype_try_from() {
1028        for v in 0..=7u8 {
1029            assert!(MainType::try_from(v).is_ok());
1030        }
1031        assert!(MainType::try_from(8).is_err());
1032    }
1033
1034    #[test]
1035    fn test_subtype_try_from() {
1036        for v in 0..=7u8 {
1037            assert!(SubType::try_from(v).is_ok());
1038        }
1039        assert!(SubType::try_from(8).is_err());
1040    }
1041
1042    #[test]
1043    fn test_version_try_from() {
1044        assert!(Version::try_from(0).is_ok());
1045        assert!(Version::try_from(1).is_err());
1046    }
1047
1048    #[test]
1049    fn test_subtype_text_alias() {
1050        assert_eq!(SubType::TEXT, SubType::None);
1051        assert_eq!(SubType::TEXT as u8, 0);
1052    }
1053
1054    // ---- Bit helper tests ----
1055
1056    #[test]
1057    fn test_bits_to_u32() {
1058        assert_eq!(bits_to_u32(&[false, false, false, false]), 0);
1059        assert_eq!(bits_to_u32(&[false, true, true, true]), 7);
1060        assert_eq!(bits_to_u32(&[true, false, false, false]), 8);
1061        assert_eq!(bits_to_u32(&[true, true, true, true]), 15);
1062    }
1063
1064    #[test]
1065    fn test_bytes_bits_roundtrip() {
1066        let data = vec![0x00, 0x01, 0xFF, 0xAB];
1067        let bits = bytes_to_bits(&data);
1068        assert_eq!(bits.len(), 32);
1069        let bytes = bits_to_bytes(&bits);
1070        assert_eq!(bytes, data);
1071    }
1072
1073    // ---- encode_units tests ----
1074
1075    #[test]
1076    fn test_encode_units_empty() {
1077        assert_eq!(encode_units(&[]).unwrap(), 0);
1078    }
1079
1080    #[test]
1081    fn test_encode_units_content_only() {
1082        assert_eq!(encode_units(&[MainType::Content]).unwrap(), 1);
1083    }
1084
1085    #[test]
1086    fn test_encode_units_semantic_only() {
1087        assert_eq!(encode_units(&[MainType::Semantic]).unwrap(), 2);
1088    }
1089
1090    #[test]
1091    fn test_encode_units_semantic_content() {
1092        assert_eq!(
1093            encode_units(&[MainType::Semantic, MainType::Content]).unwrap(),
1094            3
1095        );
1096    }
1097
1098    #[test]
1099    fn test_encode_units_meta_only() {
1100        assert_eq!(encode_units(&[MainType::Meta]).unwrap(), 4);
1101    }
1102
1103    #[test]
1104    fn test_encode_units_meta_content() {
1105        assert_eq!(
1106            encode_units(&[MainType::Meta, MainType::Content]).unwrap(),
1107            5
1108        );
1109    }
1110
1111    #[test]
1112    fn test_encode_units_meta_semantic() {
1113        assert_eq!(
1114            encode_units(&[MainType::Meta, MainType::Semantic]).unwrap(),
1115            6
1116        );
1117    }
1118
1119    #[test]
1120    fn test_encode_units_all_optional() {
1121        assert_eq!(
1122            encode_units(&[MainType::Meta, MainType::Semantic, MainType::Content]).unwrap(),
1123            7
1124        );
1125    }
1126
1127    #[test]
1128    fn test_encode_units_rejects_data() {
1129        assert!(encode_units(&[MainType::Data]).is_err());
1130    }
1131
1132    #[test]
1133    fn test_encode_units_rejects_instance() {
1134        assert!(encode_units(&[MainType::Instance]).is_err());
1135    }
1136
1137    #[test]
1138    fn test_encode_units_rejects_iscc() {
1139        assert!(encode_units(&[MainType::Iscc]).is_err());
1140    }
1141
1142    // ---- decode_units tests ----
1143
1144    #[test]
1145    fn test_decode_units_empty() {
1146        assert_eq!(decode_units(0).unwrap(), vec![]);
1147    }
1148
1149    #[test]
1150    fn test_decode_units_content() {
1151        assert_eq!(decode_units(1).unwrap(), vec![MainType::Content]);
1152    }
1153
1154    #[test]
1155    fn test_decode_units_semantic() {
1156        assert_eq!(decode_units(2).unwrap(), vec![MainType::Semantic]);
1157    }
1158
1159    #[test]
1160    fn test_decode_units_semantic_content() {
1161        assert_eq!(
1162            decode_units(3).unwrap(),
1163            vec![MainType::Semantic, MainType::Content]
1164        );
1165    }
1166
1167    #[test]
1168    fn test_decode_units_meta() {
1169        assert_eq!(decode_units(4).unwrap(), vec![MainType::Meta]);
1170    }
1171
1172    #[test]
1173    fn test_decode_units_meta_content() {
1174        assert_eq!(
1175            decode_units(5).unwrap(),
1176            vec![MainType::Meta, MainType::Content]
1177        );
1178    }
1179
1180    #[test]
1181    fn test_decode_units_meta_semantic() {
1182        assert_eq!(
1183            decode_units(6).unwrap(),
1184            vec![MainType::Meta, MainType::Semantic]
1185        );
1186    }
1187
1188    #[test]
1189    fn test_decode_units_all() {
1190        assert_eq!(
1191            decode_units(7).unwrap(),
1192            vec![MainType::Meta, MainType::Semantic, MainType::Content]
1193        );
1194    }
1195
1196    #[test]
1197    fn test_decode_units_invalid() {
1198        assert!(decode_units(8).is_err());
1199        assert!(decode_units(255).is_err());
1200    }
1201
1202    #[test]
1203    fn test_decode_units_roundtrip_with_encode_units() {
1204        for unit_id in 0..=7u32 {
1205            let types = decode_units(unit_id).unwrap();
1206            let encoded = encode_units(&types).unwrap();
1207            assert_eq!(encoded, unit_id, "roundtrip failed for unit_id={unit_id}");
1208        }
1209    }
1210
1211    // ---- iscc_decompose tests ----
1212
1213    #[test]
1214    fn test_decompose_single_meta_unit() {
1215        // A single Meta-Code unit passes through unchanged
1216        let result = iscc_decompose("AAAYPXW445FTYNJ3").unwrap();
1217        assert_eq!(result, vec!["AAAYPXW445FTYNJ3"]);
1218    }
1219
1220    #[test]
1221    fn test_decompose_single_unit_with_prefix() {
1222        // Accepts "ISCC:" prefix and returns without prefix
1223        let result = iscc_decompose("ISCC:AAAYPXW445FTYNJ3").unwrap();
1224        assert_eq!(result, vec!["AAAYPXW445FTYNJ3"]);
1225    }
1226
1227    #[test]
1228    fn test_decompose_single_unit_maintype() {
1229        // Verify the decomposed unit decodes to the expected MainType
1230        let result = iscc_decompose("AAAYPXW445FTYNJ3").unwrap();
1231        assert_eq!(result.len(), 1);
1232        let raw = decode_base32(&result[0]).unwrap();
1233        let (mt, _, _, _, _) = decode_header(&raw).unwrap();
1234        assert_eq!(mt, MainType::Meta);
1235    }
1236
1237    #[test]
1238    fn test_decompose_standard_iscc_code() {
1239        // test_0000_standard: Meta + Content(Text) + Data + Instance → composite
1240        let codes = [
1241            "AAAYPXW445FTYNJ3",
1242            "EAARMJLTQCUWAND2",
1243            "GABVVC5DMJJGYKZ4ZBYVNYABFFYXG",
1244            "IADWIK7A7JTUAQ2D6QARX7OBEIK3OOUAM42LOBLCZ4ZOGDLRHMDL6TQ",
1245        ];
1246        let composite = crate::gen_iscc_code_v0(
1247            &codes.iter().map(|s| *s as &str).collect::<Vec<&str>>(),
1248            false,
1249        )
1250        .unwrap();
1251
1252        let decomposed = iscc_decompose(&composite.iscc).unwrap();
1253
1254        // Should produce 4 units: Meta, Content, Data, Instance
1255        assert_eq!(decomposed.len(), 4);
1256
1257        // Verify MainTypes in order
1258        let main_types: Vec<MainType> = decomposed
1259            .iter()
1260            .map(|code| {
1261                let raw = decode_base32(code).unwrap();
1262                let (mt, _, _, _, _) = decode_header(&raw).unwrap();
1263                mt
1264            })
1265            .collect();
1266        assert_eq!(
1267            main_types,
1268            vec![
1269                MainType::Meta,
1270                MainType::Content,
1271                MainType::Data,
1272                MainType::Instance
1273            ]
1274        );
1275
1276        // Data and Instance are always the last two
1277        let raw_data = decode_base32(&decomposed[2]).unwrap();
1278        let (mt_d, _, _, _, _) = decode_header(&raw_data).unwrap();
1279        assert_eq!(mt_d, MainType::Data);
1280
1281        let raw_inst = decode_base32(&decomposed[3]).unwrap();
1282        let (mt_i, _, _, _, _) = decode_header(&raw_inst).unwrap();
1283        assert_eq!(mt_i, MainType::Instance);
1284    }
1285
1286    #[test]
1287    fn test_decompose_no_meta() {
1288        // test_0001_no_meta: Content(Text) + Data + Instance → composite (no Meta)
1289        let codes = [
1290            "EAARMJLTQCUWAND2",
1291            "GABVVC5DMJJGYKZ4ZBYVNYABFFYXG",
1292            "IADWIK7A7JTUAQ2D6QARX7OBEIK3OOUAM42LOBLCZ4ZOGDLRHMDL6TQ",
1293        ];
1294        let composite = crate::gen_iscc_code_v0(
1295            &codes.iter().map(|s| *s as &str).collect::<Vec<&str>>(),
1296            false,
1297        )
1298        .unwrap();
1299
1300        let decomposed = iscc_decompose(&composite.iscc).unwrap();
1301
1302        // Should produce 3 units: Content, Data, Instance (no Meta)
1303        assert_eq!(decomposed.len(), 3);
1304
1305        let main_types: Vec<MainType> = decomposed
1306            .iter()
1307            .map(|code| {
1308                let raw = decode_base32(code).unwrap();
1309                let (mt, _, _, _, _) = decode_header(&raw).unwrap();
1310                mt
1311            })
1312            .collect();
1313        assert_eq!(
1314            main_types,
1315            vec![MainType::Content, MainType::Data, MainType::Instance]
1316        );
1317    }
1318
1319    #[test]
1320    fn test_decompose_sum_only() {
1321        // test_0002: Data + Instance only (Sum SubType)
1322        let codes = [
1323            "GABVVC5DMJJGYKZ4ZBYVNYABFFYXG",
1324            "IADWIK7A7JTUAQ2D6QARX7OBEIK3OOUAM42LOBLCZ4ZOGDLRHMDL6TQ",
1325        ];
1326        let composite = crate::gen_iscc_code_v0(
1327            &codes.iter().map(|s| *s as &str).collect::<Vec<&str>>(),
1328            false,
1329        )
1330        .unwrap();
1331
1332        let decomposed = iscc_decompose(&composite.iscc).unwrap();
1333
1334        // Should produce 2 units: Data, Instance
1335        assert_eq!(decomposed.len(), 2);
1336
1337        let main_types: Vec<MainType> = decomposed
1338            .iter()
1339            .map(|code| {
1340                let raw = decode_base32(code).unwrap();
1341                let (mt, _, _, _, _) = decode_header(&raw).unwrap();
1342                mt
1343            })
1344            .collect();
1345        assert_eq!(main_types, vec![MainType::Data, MainType::Instance]);
1346    }
1347
1348    #[test]
1349    fn test_decompose_conformance_roundtrip() {
1350        // Use gen_iscc_code_v0 conformance vectors to verify decompose
1351        let json_str = include_str!("../tests/data.json");
1352        let data: serde_json::Value = serde_json::from_str(json_str).unwrap();
1353        let section = &data["gen_iscc_code_v0"];
1354        let cases = section.as_object().unwrap();
1355
1356        for (tc_name, tc) in cases {
1357            let expected_iscc = tc["outputs"]["iscc"].as_str().unwrap();
1358            let inputs = tc["inputs"].as_array().unwrap();
1359            let codes_json = inputs[0].as_array().unwrap();
1360            let input_codes: Vec<&str> = codes_json.iter().map(|v| v.as_str().unwrap()).collect();
1361
1362            let decomposed = iscc_decompose(expected_iscc).unwrap();
1363
1364            // Each decomposed code decodes to a valid MainType
1365            for code in &decomposed {
1366                let raw = decode_base32(code).unwrap();
1367                let (mt, _, _, _, _) = decode_header(&raw).unwrap();
1368                assert_ne!(
1369                    mt,
1370                    MainType::Iscc,
1371                    "decomposed unit should not be ISCC in {tc_name}"
1372                );
1373            }
1374
1375            // Data and Instance are always the last two units
1376            let last_two: Vec<MainType> = decomposed[decomposed.len() - 2..]
1377                .iter()
1378                .map(|code| {
1379                    let raw = decode_base32(code).unwrap();
1380                    let (mt, _, _, _, _) = decode_header(&raw).unwrap();
1381                    mt
1382                })
1383                .collect();
1384            assert_eq!(
1385                last_two,
1386                vec![MainType::Data, MainType::Instance],
1387                "last two units must be Data+Instance in {tc_name}"
1388            );
1389
1390            // Number of decomposed units matches number of input codes
1391            assert_eq!(
1392                decomposed.len(),
1393                input_codes.len(),
1394                "decomposed unit count mismatch in {tc_name}"
1395            );
1396        }
1397    }
1398
1399    // ---- iscc_decompose truncation tests ----
1400
1401    /// Build a truncated ISCC string: valid header for given params, but fewer body bytes than needed.
1402    ///
1403    /// For ISCC MainType, `length_field` is the raw unit_id (0-7).
1404    /// For other MainTypes, `length_field` is the raw header length field value.
1405    fn make_truncated_iscc(
1406        mtype: MainType,
1407        stype: SubType,
1408        length_field: u32,
1409        body_len: usize,
1410    ) -> String {
1411        let header = encode_header(mtype, stype, Version::V0, length_field).unwrap();
1412        let mut raw = header;
1413        raw.extend(vec![0xABu8; body_len]);
1414        encode_base32(&raw)
1415    }
1416
1417    #[test]
1418    fn test_decompose_truncated_standard_unit() {
1419        // Meta-Code header for 64 bits (8 bytes expected), but only 4 body bytes provided.
1420        // encode_length(Meta, 64) = 64/32 - 1 = 1
1421        let length_field = encode_length(MainType::Meta, 64).unwrap();
1422        let iscc = make_truncated_iscc(MainType::Meta, SubType::None, length_field, 4);
1423        let result = iscc_decompose(&iscc);
1424        assert!(
1425            result.is_err(),
1426            "expected error for truncated standard unit"
1427        );
1428        let err = result.unwrap_err().to_string();
1429        assert!(
1430            err.contains("truncated ISCC body"),
1431            "error should mention truncation: {err}"
1432        );
1433    }
1434
1435    #[test]
1436    fn test_decompose_truncated_wide_mode() {
1437        // ISCC-CODE Wide header expects 32 body bytes, provide only 16.
1438        // For Wide ISCC-CODE, length field is unit_id (0 = no optional units)
1439        let iscc = make_truncated_iscc(MainType::Iscc, SubType::Wide, 0, 16);
1440        let result = iscc_decompose(&iscc);
1441        assert!(result.is_err(), "expected error for truncated wide mode");
1442        let err = result.unwrap_err().to_string();
1443        assert!(
1444            err.contains("truncated ISCC body"),
1445            "error should mention truncation: {err}"
1446        );
1447    }
1448
1449    #[test]
1450    fn test_decompose_truncated_dynamic_units() {
1451        // ISCC-CODE with Meta+Content (unit_id=5, bit0=Content+bit2=Meta)
1452        // Dynamic units: 2 × 8 = 16 bytes, static: 16 bytes, total: 32 bytes needed
1453        // Provide only 8 body bytes (enough for 1 dynamic unit, not all)
1454        let unit_id = 5; // Meta + Content
1455        let iscc = make_truncated_iscc(MainType::Iscc, SubType::None, unit_id, 8);
1456        let result = iscc_decompose(&iscc);
1457        assert!(
1458            result.is_err(),
1459            "expected error for truncated dynamic units"
1460        );
1461        let err = result.unwrap_err().to_string();
1462        assert!(
1463            err.contains("truncated ISCC body"),
1464            "error should mention truncation: {err}"
1465        );
1466    }
1467
1468    #[test]
1469    fn test_decompose_truncated_static_units() {
1470        // ISCC-CODE with Content only (unit_id=1)
1471        // Dynamic: 1 × 8 = 8, static: 16, total: 24 bytes needed
1472        // Provide only 16 body bytes (dynamic ok, but static Data+Instance missing)
1473        let unit_id = 1; // Content only
1474        let iscc = make_truncated_iscc(MainType::Iscc, SubType::None, unit_id, 16);
1475        let result = iscc_decompose(&iscc);
1476        assert!(result.is_err(), "expected error for truncated static units");
1477        let err = result.unwrap_err().to_string();
1478        assert!(
1479            err.contains("truncated ISCC body"),
1480            "error should mention truncation: {err}"
1481        );
1482    }
1483
1484    #[test]
1485    fn test_decompose_empty_body() {
1486        // Meta-Code header for 64 bits but zero body bytes
1487        let length_field = encode_length(MainType::Meta, 64).unwrap();
1488        let iscc = make_truncated_iscc(MainType::Meta, SubType::None, length_field, 0);
1489        let result = iscc_decompose(&iscc);
1490        assert!(result.is_err(), "expected error for empty body");
1491        let err = result.unwrap_err().to_string();
1492        assert!(
1493            err.contains("truncated ISCC body"),
1494            "error should mention truncation: {err}"
1495        );
1496    }
1497
1498    #[test]
1499    fn test_decompose_valid_still_works() {
1500        // A valid ISCC-CODE should still decompose correctly (regression guard)
1501        // Build: Meta(64) + Content-Text(64) + Data(64) + Instance(64)
1502        let meta_body = [0x11u8; 8];
1503        let content_body = [0x22u8; 8];
1504        let data_body = [0x33u8; 8];
1505        let instance_body = [0x44u8; 8];
1506
1507        let meta_code =
1508            encode_component(MainType::Meta, SubType::None, Version::V0, 64, &meta_body).unwrap();
1509        let content_code = encode_component(
1510            MainType::Content,
1511            SubType::None,
1512            Version::V0,
1513            64,
1514            &content_body,
1515        )
1516        .unwrap();
1517        let data_code =
1518            encode_component(MainType::Data, SubType::None, Version::V0, 64, &data_body).unwrap();
1519        let instance_code = encode_component(
1520            MainType::Instance,
1521            SubType::None,
1522            Version::V0,
1523            64,
1524            &instance_body,
1525        )
1526        .unwrap();
1527
1528        // Concatenate as a sequence of ISCC-UNITs (not a single ISCC-CODE)
1529        let sequence = format!("{meta_code}{content_code}{data_code}{instance_code}");
1530        let raw = decode_base32(&sequence).unwrap();
1531        let full_iscc = encode_base32(&raw);
1532
1533        let result = iscc_decompose(&full_iscc);
1534        assert!(
1535            result.is_ok(),
1536            "valid ISCC sequence should decompose: {result:?}"
1537        );
1538        let units = result.unwrap();
1539        assert_eq!(units.len(), 4, "should decompose into 4 units");
1540    }
1541}