base-d 3.0.34 - Docs.rs

use crate::{Dictionary, DictionaryRegistry, EncodingMode, decode, encode};

fn get_dictionary(name: &str) -> Dictionary {
    let config = DictionaryRegistry::load_default().unwrap();
    let dictionary_config = config.get_dictionary(name).unwrap();
    let effective_mode = dictionary_config.effective_mode();

    match effective_mode {
        EncodingMode::ByteRange => {
            let start = dictionary_config.start_codepoint.unwrap();
            Dictionary::builder()
                .mode(effective_mode)
                .start_codepoint(start)
                .build()
                .unwrap()
        }
        _ => {
            let chars: Vec<char> = dictionary_config
                .effective_chars()
                .unwrap()
                .chars()
                .collect();
            let padding = dictionary_config
                .padding
                .as_ref()
                .and_then(|s| s.chars().next());
            let mut builder = Dictionary::builder().chars(chars).mode(effective_mode);
            if let Some(p) = padding {
                builder = builder.padding(p);
            }
            builder.build().unwrap()
        }
    }
}

#[test]
fn test_encode_decode_empty() {
    let dictionary = get_dictionary("cards");
    let data = b"";
    let encoded = encode(data, &dictionary);
    assert_eq!(encoded, "");
}

#[test]
fn test_encode_decode_zero() {
    let dictionary = get_dictionary("cards");
    let data = &[0u8];
    let encoded = encode(data, &dictionary);
    assert_eq!(encoded.chars().count(), 1);
    let decoded = decode(&encoded, &dictionary).unwrap();
    assert_eq!(decoded, data);
}

#[test]
fn test_encode_decode_simple() {
    let dictionary = get_dictionary("cards");
    let data = b"Hello";
    let encoded = encode(data, &dictionary);
    let decoded = decode(&encoded, &dictionary).unwrap();
    assert_eq!(decoded, data);
}

#[test]
fn test_encode_decode_hello_world() {
    let dictionary = get_dictionary("cards");
    let data = b"Hello, World!";
    let encoded = encode(data, &dictionary);
    println!("Encoded: {}", encoded);
    let decoded = decode(&encoded, &dictionary).unwrap();
    assert_eq!(decoded, data);
}

#[test]
fn test_encode_decode_binary() {
    let dictionary = get_dictionary("cards");
    let data = &[0u8, 1, 2, 3, 255, 254, 253];
    let encoded = encode(data, &dictionary);
    let decoded = decode(&encoded, &dictionary).unwrap();
    assert_eq!(decoded, data);
}

#[test]
fn test_encode_decode_leading_zeros() {
    let dictionary = get_dictionary("cards");
    let data = &[0u8, 0, 0, 1, 2, 3];
    let encoded = encode(data, &dictionary);
    let decoded = decode(&encoded, &dictionary).unwrap();
    assert_eq!(decoded, data);
}

#[test]
fn test_decode_invalid_character() {
    let dictionary = get_dictionary("cards");
    let result = decode("ABC", &dictionary);
    assert!(result.is_err());
}

#[test]
fn test_dictionary_base() {
    let dictionary = get_dictionary("cards");
    assert_eq!(dictionary.base(), 52);
}

#[test]
fn test_base64_chunked_mode() {
    let dictionary = get_dictionary("base64");
    assert_eq!(dictionary.mode(), &EncodingMode::Chunked);

    // Test standard base64 encoding
    let data = b"Hello, World!";
    let encoded = encode(data, &dictionary);
    println!("base64 encoded: {}", encoded);

    // Should match standard base64
    let expected = "SGVsbG8sIFdvcmxkIQ==";
    assert_eq!(encoded, expected);

    // Test decoding
    let decoded = decode(&encoded, &dictionary).unwrap();
    assert_eq!(decoded, data);
}

#[test]
fn test_base64_radix_mode() {
    let dictionary = get_dictionary("base64_radix");
    assert_eq!(dictionary.mode(), &EncodingMode::Radix);

    // This should use radix base conversion
    let data = b"Hello, World!";
    let encoded = encode(data, &dictionary);
    println!("base64_radix encoded: {}", encoded);

    // Should NOT match standard base64
    let standard_base64 = "SGVsbG8sIFdvcmxkIQ==";
    assert_ne!(encoded, standard_base64);

    // But should still round-trip
    let decoded = decode(&encoded, &dictionary).unwrap();
    assert_eq!(decoded, data);
}

#[test]
fn test_base100_byte_range_mode() {
    let dictionary = get_dictionary("base100");
    assert_eq!(dictionary.mode(), &EncodingMode::ByteRange);
    assert_eq!(dictionary.base(), 256);

    // Test simple encoding
    let data = b"Hello, World!";
    let encoded = encode(data, &dictionary);
    println!("base100 encoded: {}", encoded);

    // Each byte should map to exactly one emoji
    assert_eq!(encoded.chars().count(), data.len());

    // Verify specific codepoints for first few characters
    // 'H' = 72, should map to 127991 + 72 = 128063 (U+1F43F)
    let first_char = encoded.chars().next().unwrap();
    assert_eq!(first_char as u32, 127991 + 72);

    // Test decoding
    let decoded = decode(&encoded, &dictionary).unwrap();
    assert_eq!(decoded, data);
}

#[test]
fn test_base100_all_bytes() {
    let dictionary = get_dictionary("base100");

    // Test all 256 possible byte values
    let data: Vec<u8> = (0..=255).collect();
    let encoded = encode(&data, &dictionary);

    // Should encode to 256 emojis
    assert_eq!(encoded.chars().count(), 256);

    // Should round-trip correctly
    let decoded = decode(&encoded, &dictionary).unwrap();
    assert_eq!(decoded, data);
}

#[test]
fn test_base100_empty() {
    let dictionary = get_dictionary("base100");

    let data = b"";
    let encoded = encode(data, &dictionary);
    assert_eq!(encoded, "");

    let decoded = decode(&encoded, &dictionary).unwrap();
    assert_eq!(decoded, data);
}

#[test]
fn test_base100_binary_data() {
    let dictionary = get_dictionary("base100");

    let data = &[0u8, 1, 2, 3, 255, 254, 253, 128, 127];
    let encoded = encode(data, &dictionary);
    let decoded = decode(&encoded, &dictionary).unwrap();
    assert_eq!(decoded, data);
}

#[test]
fn test_base1024_large_dictionary() {
    // Test that we can load and use a 1024-character dictionary
    let dictionary = get_dictionary("base1024");

    // Verify base size
    assert_eq!(dictionary.base(), 1024);

    // Test encoding/decoding various data sizes
    let test_data = vec![
        b"A".to_vec(),
        b"Hello".to_vec(),
        b"Hello, World!".to_vec(),
        (0u8..=255).collect::<Vec<u8>>(), // All bytes
    ];

    for data in test_data {
        let encoded = encode(&data, &dictionary);
        let decoded = decode(&encoded, &dictionary).unwrap();
        assert_eq!(decoded, data, "Failed for data of length {}", data.len());

        // Verify that encoding with larger base produces shorter output
        // For mathematical mode, larger base = more compact representation
        // Each 1024-base digit represents ~10 bits (log2(1024) = 10)
        let bits_in = data.len() * 8;
        let max_chars = bits_in.div_ceil(10); // ceiling division
        assert!(
            encoded.chars().count() <= max_chars + 1,
            "Encoding too long: {} chars for {} bytes (expected <= {})",
            encoded.chars().count(),
            data.len(),
            max_chars + 1
        );
    }
}

#[test]
fn test_base1024_uses_hashmap() {
    // Base1024 uses non-ASCII characters, so it should use HashMap not lookup table
    let dictionary = get_dictionary("base1024");

    // Test that decoding works correctly (verifies HashMap fallback)
    let data = b"Testing large dictionary HashMap fallback";
    let encoded = encode(data, &dictionary);
    let decoded = decode(&encoded, &dictionary).unwrap();
    assert_eq!(decoded, data);
}

#[test]
fn test_base1024_efficiency() {
    let dictionary = get_dictionary("base1024");

    // Compare with base64 for same data
    let base64 = get_dictionary("base64");
    let data = b"The quick brown fox jumps over the lazy dog";

    let encoded_1024 = encode(data, &dictionary);
    let encoded_64 = encode(data, &base64);

    // Base1024 should produce fewer characters than base64
    // base1024: ~10 bits per char, base64: 6 bits per char
    assert!(
        encoded_1024.chars().count() < encoded_64.chars().count(),
        "Base1024 ({} chars) should be shorter than base64 ({} chars)",
        encoded_1024.chars().count(),
        encoded_64.chars().count()
    );
}

#[test]
fn test_base256_matrix_like_hex() {
    // Test that base256_matrix works identically in both modes (like hexadecimal)
    let dictionary_chunked = get_dictionary("base256_matrix");

    // Verify it's a 256-character dictionary
    assert_eq!(dictionary_chunked.base(), 256);

    // Create radix mode version
    let config = DictionaryRegistry::load_default().unwrap();
    let matrix_config = config.get_dictionary("base256_matrix").unwrap();
    let chars: Vec<char> = matrix_config.effective_chars().unwrap().chars().collect();
    let dictionary_radix = Dictionary::builder()
        .chars(chars)
        .mode(EncodingMode::Radix)
        .build()
        .unwrap();

    // Test various data sizes
    let test_data = vec![
        b"A".to_vec(),
        b"Hi".to_vec(),
        b"Matrix".to_vec(),
        b"The Matrix has you...".to_vec(),
        (0u8..=255).collect::<Vec<u8>>(), // All bytes
    ];

    for data in test_data {
        let chunked_encoded = encode(&data, &dictionary_chunked);
        let radix_encoded = encode(&data, &dictionary_radix);

        // Both modes should produce IDENTICAL output (like hexadecimal)
        assert_eq!(
            chunked_encoded,
            radix_encoded,
            "Modes should produce identical output for {} bytes (like hex!)",
            data.len()
        );

        // Verify round-trip
        let decoded = decode(&chunked_encoded, &dictionary_chunked).unwrap();
        assert_eq!(decoded, data);

        // Verify 1:1 mapping (256 = 2^8 = 1 byte per char)
        assert_eq!(
            chunked_encoded.chars().count(),
            data.len(),
            "Base256 should have 1:1 char-to-byte ratio"
        );
    }
}

#[test]
fn test_base256_matrix_perfect_encoding() {
    let dictionary = get_dictionary("base256_matrix");

    // Test the special property: 8 bits % log2(256) = 8 % 8 = 0
    // This means no expansion, perfect 1:1 mapping
    let data = b"Follow the white rabbit";
    let encoded = encode(data, &dictionary);

    // Should be exactly the same length
    assert_eq!(encoded.chars().count(), data.len());

    // Decode should work perfectly
    let decoded = decode(&encoded, &dictionary).unwrap();
    assert_eq!(decoded, data);
}

#[test]
fn test_base256_matrix_all_bytes() {
    let dictionary = get_dictionary("base256_matrix");

    // Test that all 256 possible byte values can be encoded/decoded
    let all_bytes: Vec<u8> = (0..=255).collect();
    let encoded = encode(&all_bytes, &dictionary);
    let decoded = decode(&encoded, &dictionary).unwrap();

    assert_eq!(decoded, all_bytes);
    assert_eq!(encoded.chars().count(), 256); // 1:1 ratio
}

// ============================================================================
// RFC 4648 Official Test Vectors
// https://datatracker.ietf.org/doc/html/rfc4648#section-10
// ============================================================================

#[test]
fn test_rfc4648_base64_vectors() {
    let dictionary = get_dictionary("base64");

    // RFC 4648 Section 10 test vectors
    let test_cases = [
        (b"".as_slice(), ""),
        (b"f".as_slice(), "Zg=="),
        (b"fo".as_slice(), "Zm8="),
        (b"foo".as_slice(), "Zm9v"),
        (b"foob".as_slice(), "Zm9vYg=="),
        (b"fooba".as_slice(), "Zm9vYmE="),
        (b"foobar".as_slice(), "Zm9vYmFy"),
    ];

    for (input, expected) in test_cases {
        let encoded = encode(input, &dictionary);
        assert_eq!(
            encoded,
            expected,
            "Base64 encoding mismatch for {:?}: got {}, expected {}",
            String::from_utf8_lossy(input),
            encoded,
            expected
        );

        // Also verify round-trip
        if !expected.is_empty() {
            let decoded = decode(&encoded, &dictionary).unwrap();
            assert_eq!(
                decoded, input,
                "Base64 round-trip failed for {:?}",
                expected
            );
        }
    }
}

#[test]
fn test_rfc4648_base32_vectors() {
    let dictionary = get_dictionary("base32");

    // RFC 4648 Section 10 test vectors
    let test_cases = [
        (b"".as_slice(), ""),
        (b"f".as_slice(), "MY======"),
        (b"fo".as_slice(), "MZXQ===="),
        (b"foo".as_slice(), "MZXW6==="),
        (b"foob".as_slice(), "MZXW6YQ="),
        (b"fooba".as_slice(), "MZXW6YTB"),
        (b"foobar".as_slice(), "MZXW6YTBOI======"),
    ];

    for (input, expected) in test_cases {
        let encoded = encode(input, &dictionary);
        assert_eq!(
            encoded,
            expected,
            "Base32 encoding mismatch for {:?}: got {}, expected {}",
            String::from_utf8_lossy(input),
            encoded,
            expected
        );

        // Also verify round-trip
        if !expected.is_empty() {
            let decoded = decode(&encoded, &dictionary).unwrap();
            assert_eq!(
                decoded, input,
                "Base32 round-trip failed for {:?}",
                expected
            );
        }
    }
}

#[test]
fn test_rfc4648_base16_vectors() {
    let dictionary = get_dictionary("base16");

    // RFC 4648 Section 10 test vectors (uppercase)
    let test_cases = [
        (b"".as_slice(), ""),
        (b"f".as_slice(), "66"),
        (b"fo".as_slice(), "666F"),
        (b"foo".as_slice(), "666F6F"),
        (b"foob".as_slice(), "666F6F62"),
        (b"fooba".as_slice(), "666F6F6261"),
        (b"foobar".as_slice(), "666F6F626172"),
    ];

    for (input, expected) in test_cases {
        let encoded = encode(input, &dictionary);
        assert_eq!(
            encoded,
            expected,
            "Base16 encoding mismatch for {:?}: got {}, expected {}",
            String::from_utf8_lossy(input),
            encoded,
            expected
        );

        // Also verify round-trip
        if !expected.is_empty() {
            let decoded = decode(&encoded, &dictionary).unwrap();
            assert_eq!(
                decoded, input,
                "Base16 round-trip failed for {:?}",
                expected
            );
        }
    }
}

#[test]
fn test_rfc4648_base32hex_vectors() {
    let dictionary = get_dictionary("base32hex");

    // RFC 4648 Section 10 test vectors for base32hex (Extended Hex)
    // These use 0-9A-V instead of A-Z2-7
    let test_cases = [
        (b"".as_slice(), ""),
        (b"f".as_slice(), "CO======"),
        (b"fo".as_slice(), "CPNG===="),
        (b"foo".as_slice(), "CPNMU==="),
        (b"foob".as_slice(), "CPNMUOG="),
        (b"fooba".as_slice(), "CPNMUOJ1"),
        (b"foobar".as_slice(), "CPNMUOJ1E8======"),
    ];

    for (input, expected) in test_cases {
        let encoded = encode(input, &dictionary);
        assert_eq!(
            encoded,
            expected,
            "Base32hex encoding mismatch for {:?}: got {}, expected {}",
            String::from_utf8_lossy(input),
            encoded,
            expected
        );

        // Also verify round-trip
        if !expected.is_empty() {
            let decoded = decode(&encoded, &dictionary).unwrap();
            assert_eq!(
                decoded, input,
                "Base32hex round-trip failed for {:?}",
                expected
            );
        }
    }
}

// ============================================================================
// Base58 Test Vectors (IETF Draft & Bitcoin wiki)
// https://datatracker.ietf.org/doc/html/draft-msporny-base58-03
// ============================================================================

#[test]
fn test_base58_bitcoin_vectors() {
    let dictionary = get_dictionary("base58");

    // IETF Base58 draft specification test vectors (Bitcoin alphabet)
    let test_cases = [
        (b"Hello World!".as_slice(), "2NEpo7TZRRrLZSi2U"),
        (
            b"The quick brown fox jumps over the lazy dog.".as_slice(),
            "USm3fpXnKG5EUBx2ndxBDMPVciP5hGey2Jh4NDv6gmeo1LkMeiKrLJUUBk6Z",
        ),
        (b"hello world".as_slice(), "StV1DL6CwTryKyV"),
    ];

    for (input, expected) in test_cases {
        let encoded = encode(input, &dictionary);
        assert_eq!(
            encoded,
            expected,
            "Base58 encoding mismatch for {:?}: got {}, expected {}",
            String::from_utf8_lossy(input),
            encoded,
            expected
        );

        // Also verify round-trip
        let decoded = decode(&encoded, &dictionary).unwrap();
        assert_eq!(
            decoded, input,
            "Base58 round-trip failed for {:?}",
            expected
        );
    }
}

#[test]
fn test_base58_flickr_vectors() {
    let dictionary = get_dictionary("base58flickr");

    // Flickr uses lowercase before uppercase
    // "Hello World" -> "iXf12sRWto45bmC" (from spec)
    let test_cases = [(b"Hello World".as_slice(), "iXf12sRWto45bmC")];

    for (input, expected) in test_cases {
        let encoded = encode(input, &dictionary);
        assert_eq!(
            encoded,
            expected,
            "Base58 Flickr encoding mismatch for {:?}: got {}, expected {}",
            String::from_utf8_lossy(input),
            encoded,
            expected
        );

        // Also verify round-trip
        let decoded = decode(&encoded, &dictionary).unwrap();
        assert_eq!(
            decoded, input,
            "Base58 Flickr round-trip failed for {:?}",
            expected
        );
    }
}

#[test]
fn test_base58_leading_zeros() {
    let dictionary = get_dictionary("base58");

    // Leading zeros should be preserved as '1' characters
    // 0x0000287fb4cd -> "11233QC4"
    let input = &[0x00, 0x00, 0x28, 0x7f, 0xb4, 0xcd];
    let expected = "11233QC4";

    let encoded = encode(input, &dictionary);
    assert_eq!(
        encoded, expected,
        "Base58 leading zeros mismatch: got {}, expected {}",
        encoded, expected
    );

    let decoded = decode(&encoded, &dictionary).unwrap();
    assert_eq!(decoded, input, "Base58 leading zeros round-trip failed");
}

/// Test geohash encoding - regression test for SIMD range-reduction bug
///
/// Geohash uses a non-contiguous 32-char alphabet that doesn't fit in the
/// 16-byte pshufb LUT, so it must fall back to scalar encoding.
#[test]
fn test_base32_geohash() {
    let dictionary = get_dictionary("base32_geohash");

    // Geohash alphabet: 0123456789bcdefghjkmnpqrstuvwxyz (missing a,i,l,o)
    let test_cases = [
        (b"Hello".as_slice(), "91kqsv3g"),
        (b"World".as_slice(), "bxrr4v34"),
        (b"\x00".as_slice(), "00"),
        (b"\xFF".as_slice(), "zw"),
    ];

    for (input, expected) in test_cases {
        let encoded = encode(input, &dictionary);
        assert_eq!(
            encoded, expected,
            "Geohash encoding mismatch for {:?}: got {}, expected {}",
            input, encoded, expected
        );

        // Verify all output chars are valid geohash characters
        let valid_chars = "0123456789bcdefghjkmnpqrstuvwxyz";
        for c in encoded.chars() {
            assert!(
                valid_chars.contains(c),
                "Invalid geohash character '{}' in output",
                c
            );
        }

        // Verify round-trip
        if !input.is_empty() {
            let decoded = decode(&encoded, &dictionary).unwrap();
            assert_eq!(decoded, input, "Geohash round-trip failed for {:?}", input);
        }
    }
}