pub mod encoding;
pub mod error;
pub mod full_vector;
pub mod hilbert;
pub mod morton;
pub mod types;
pub mod vector_simhash;
pub use encoding::{decode_sortable, encode_sortable};
pub use error::ElidError;
pub use full_vector::{decode_full_vector, encode_full_vector, FullVectorMetadata};
pub use types::{
DimensionMode, Elid, Embedding, Profile, ProfileInfo, QuantizedCoords, VectorPrecision,
};
pub use vector_simhash::{
cosine_similarity_approx, elid_hamming_distance, embedding_to_bands, mini128_to_bands,
simhash_128, simhash_from_bytes, simhash_to_bytes,
};
pub use hilbert::{hilbert_decode, hilbert_encode};
pub use morton::{morton_decode, morton_encode};
pub fn encode(embedding: &[f32], profile: &Profile) -> Result<Elid, ElidError> {
let mut emb = Embedding::new(embedding.to_vec())?;
if !matches!(profile, Profile::FullVector { .. }) {
emb.normalize();
}
let combined_bytes = match profile {
Profile::Mini128 { seed } => {
let hash = simhash_128(emb.as_slice(), *seed);
let payload = simhash_to_bytes(hash).to_vec();
let header = ProfileInfo {
version: 0,
profile_type: profile.type_id(),
transform_id: None,
model_id: None,
original_dims: None,
precision: None,
dimension_mode: None,
seed: None,
};
let mut combined = header.to_header();
combined.extend_from_slice(&payload);
combined
}
Profile::Morton10x10 {
dims,
bits_per_dim,
transform_id,
} => {
if transform_id.is_some() {
return Err(ElidError::TransformNotFound(transform_id.unwrap()));
}
let quantized = QuantizedCoords::from_embedding(&emb, *dims, *bits_per_dim)?;
let code = morton_encode(quantized.as_slice(), *bits_per_dim);
let total_bits = (*dims as usize) * (*bits_per_dim as usize);
let payload = code_to_bytes(code, total_bits);
let header = ProfileInfo {
version: 0,
profile_type: profile.type_id(),
transform_id: None,
model_id: None,
original_dims: None,
precision: None,
dimension_mode: None,
seed: None,
};
let mut combined = header.to_header();
combined.extend_from_slice(&payload);
combined
}
Profile::Hilbert10x10 {
dims,
bits_per_dim,
transform_id,
} => {
if transform_id.is_some() {
return Err(ElidError::TransformNotFound(transform_id.unwrap()));
}
let quantized = QuantizedCoords::from_embedding(&emb, *dims, *bits_per_dim)?;
let code = hilbert_encode(quantized.as_slice(), *bits_per_dim);
let total_bits = (*dims as usize) * (*bits_per_dim as usize);
let payload = code_to_bytes(code, total_bits);
let header = ProfileInfo {
version: 0,
profile_type: profile.type_id(),
transform_id: None,
model_id: None,
original_dims: None,
precision: None,
dimension_mode: None,
seed: None,
};
let mut combined = header.to_header();
combined.extend_from_slice(&payload);
combined
}
Profile::FullVector {
precision,
dimensions,
seed,
} => {
encode_full_vector(emb.as_slice(), *precision, *dimensions, *seed)?
}
};
let encoded = encode_sortable(&combined_bytes);
Elid::from_string(encoded)
}
pub fn decode(elid: &Elid) -> Result<Vec<u8>, ElidError> {
decode_sortable(elid.as_str())
}
pub fn decode_to_embedding(elid: &Elid) -> Result<(Vec<f32>, FullVectorMetadata), ElidError> {
let bytes = decode(elid)?;
if bytes.len() < 2 {
return Err(ElidError::InvalidHeader);
}
let profile_type = bytes[0] & 0x0F;
if profile_type != 0x04 {
return Err(ElidError::DecodingNotSupported);
}
decode_full_vector(&bytes)
}
pub fn is_reversible(elid: &Elid) -> bool {
match elid.to_bytes() {
Ok(bytes) if bytes.len() >= 2 => {
let profile_type = bytes[0] & 0x0F;
profile_type == 0x04 }
_ => false,
}
}
pub fn hamming_distance(a: &Elid, b: &Elid) -> Result<u32, ElidError> {
let bytes_a = decode(a)?;
let bytes_b = decode(b)?;
let profile_a = ProfileInfo::from_header(&bytes_a[0..2])?;
let profile_b = ProfileInfo::from_header(&bytes_b[0..2])?;
if profile_a.profile_type != 0x01 {
return Err(ElidError::ProfileMismatch {
expected: "Mini128".to_string(),
got: format!("Type {:#x}", profile_a.profile_type),
});
}
if profile_b.profile_type != 0x01 {
return Err(ElidError::ProfileMismatch {
expected: "Mini128".to_string(),
got: format!("Type {:#x}", profile_b.profile_type),
});
}
let payload_a = &bytes_a[2..];
let payload_b = &bytes_b[2..];
let hash_a = simhash_from_bytes(payload_a)?;
let hash_b = simhash_from_bytes(payload_b)?;
Ok(elid_hamming_distance(hash_a, hash_b))
}
fn code_to_bytes(code: u128, total_bits: usize) -> Vec<u8> {
let needed_bytes = total_bits.div_ceil(8);
let all_bytes = code.to_be_bytes();
let start_idx = 16 - needed_bytes;
all_bytes[start_idx..].to_vec()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_encode_basic() {
let embedding = vec![0.1; 128];
let profile = Profile::default();
let result = encode(&embedding, &profile);
assert!(result.is_ok());
}
#[test]
fn test_encode_deterministic() {
let embedding = vec![0.1, 0.2, 0.3, 0.4];
let embedding = embedding.into_iter().cycle().take(128).collect::<Vec<_>>();
let profile = Profile::Mini128 {
seed: 0x454c4944_53494d48,
};
let elid1 = encode(&embedding, &profile).unwrap();
let elid2 = encode(&embedding, &profile).unwrap();
assert_eq!(
elid1, elid2,
"Same embedding + profile should produce same ELID"
);
}
#[test]
fn test_encode_validates_dimensions() {
let embedding = vec![0.1; 32]; let profile = Profile::default();
let result = encode(&embedding, &profile);
assert!(matches!(result, Err(ElidError::InvalidDimension { .. })));
let embedding = vec![0.1; 4096]; let result = encode(&embedding, &profile);
assert!(matches!(result, Err(ElidError::InvalidDimension { .. })));
}
#[test]
fn test_decode_roundtrip() {
let embedding = vec![0.1, 0.2, 0.3, 0.4];
let embedding = embedding.into_iter().cycle().take(768).collect::<Vec<_>>();
let profile = Profile::default();
let elid = encode(&embedding, &profile).unwrap();
let bytes = decode(&elid).unwrap();
assert_eq!(bytes.len(), 18);
}
#[test]
fn test_hamming_distance_identical_elids() {
let embedding = vec![0.3; 512];
let profile = Profile::default();
let elid = encode(&embedding, &profile).unwrap();
let distance = hamming_distance(&elid, &elid).unwrap();
assert_eq!(distance, 0, "Identical ELIDs should have distance 0");
}
#[test]
fn test_encode_morton() {
let embedding = vec![0.1; 128];
let profile = Profile::Morton10x10 {
dims: 10,
bits_per_dim: 10,
transform_id: None,
};
let result = encode(&embedding, &profile);
assert!(result.is_ok(), "Morton encoding should work");
}
#[test]
fn test_encode_hilbert() {
let embedding = vec![0.1; 128];
let profile = Profile::Hilbert10x10 {
dims: 10,
bits_per_dim: 10,
transform_id: None,
};
let result = encode(&embedding, &profile);
assert!(result.is_ok(), "Hilbert encoding should work");
}
#[test]
fn test_encode_full_vector_lossless() {
let embedding: Vec<f32> = (0..128).map(|i| (i as f32 / 64.0) - 1.0).collect();
let profile = Profile::lossless();
let elid = encode(&embedding, &profile).unwrap();
assert!(is_reversible(&elid));
let (decoded, metadata) = decode_to_embedding(&elid).unwrap();
assert_eq!(embedding, decoded, "Lossless encoding should be exact");
assert!(metadata.is_lossless());
}
#[test]
fn test_encode_full_vector_half16() {
let embedding: Vec<f32> = (0..768).map(|i| (i as f32 / 384.0) - 1.0).collect();
let profile = Profile::FullVector {
precision: VectorPrecision::Half16,
dimensions: DimensionMode::Preserve,
seed: 0,
};
let elid = encode(&embedding, &profile).unwrap();
assert!(is_reversible(&elid));
let (decoded, metadata) = decode_to_embedding(&elid).unwrap();
assert_eq!(decoded.len(), embedding.len());
assert!(!metadata.is_lossless());
let max_error: f32 = embedding
.iter()
.zip(decoded.iter())
.map(|(a, b)| (a - b).abs())
.fold(0.0f32, f32::max);
assert!(max_error < 0.01, "Half16 max error: {}", max_error);
}
#[test]
fn test_encode_full_vector_quant8() {
let embedding: Vec<f32> = (0..256).map(|i| (i as f32 / 128.0) - 1.0).collect();
let profile = Profile::FullVector {
precision: VectorPrecision::Quant8,
dimensions: DimensionMode::Preserve,
seed: 0,
};
let elid = encode(&embedding, &profile).unwrap();
let (decoded, _) = decode_to_embedding(&elid).unwrap();
let max_error: f32 = embedding
.iter()
.zip(decoded.iter())
.map(|(a, b)| (a - b).abs())
.fold(0.0f32, f32::max);
assert!(max_error < 0.02, "Quant8 max error: {}", max_error);
}
#[test]
fn test_encode_full_vector_dimension_reduction() {
let embedding: Vec<f32> = (0..768).map(|i| (i as f32 / 384.0) - 1.0).collect();
let profile = Profile::FullVector {
precision: VectorPrecision::Full32,
dimensions: DimensionMode::Reduce { target_dims: 256 },
seed: 0x12345678,
};
let elid = encode(&embedding, &profile).unwrap();
let (decoded, metadata) = decode_to_embedding(&elid).unwrap();
assert_eq!(decoded.len(), 256);
assert_eq!(metadata.original_dims, 768);
assert_eq!(metadata.encoded_dims, 256);
assert!(metadata.has_dimension_reduction());
}
#[test]
fn test_encode_full_vector_cross_dimensional() {
let emb_256: Vec<f32> = (0..256).map(|i| (i as f32 / 128.0) - 1.0).collect();
let emb_768: Vec<f32> = (0..768).map(|i| (i as f32 / 384.0) - 1.0).collect();
let profile = Profile::cross_dimensional(128);
let elid_256 = encode(&emb_256, &profile).unwrap();
let elid_768 = encode(&emb_768, &profile).unwrap();
let (dec_256, meta_256) = decode_to_embedding(&elid_256).unwrap();
let (dec_768, meta_768) = decode_to_embedding(&elid_768).unwrap();
assert_eq!(dec_256.len(), 128);
assert_eq!(dec_768.len(), 128);
assert_eq!(meta_256.original_dims, 256);
assert_eq!(meta_768.original_dims, 768);
let similarity: f32 = dec_256.iter().zip(dec_768.iter()).map(|(a, b)| a * b).sum();
assert!(similarity.is_finite());
}
#[test]
fn test_is_reversible_mini128() {
let embedding = vec![0.1; 128];
let profile = Profile::default();
let elid = encode(&embedding, &profile).unwrap();
assert!(!is_reversible(&elid), "Mini128 should not be reversible");
}
#[test]
fn test_is_reversible_full_vector() {
let embedding = vec![0.1; 128];
let profile = Profile::lossless();
let elid = encode(&embedding, &profile).unwrap();
assert!(is_reversible(&elid), "FullVector should be reversible");
}
#[test]
fn test_decode_to_embedding_unsupported() {
let embedding = vec![0.1; 128];
let profile = Profile::default();
let elid = encode(&embedding, &profile).unwrap();
let result = decode_to_embedding(&elid);
assert!(
matches!(result, Err(ElidError::DecodingNotSupported)),
"Mini128 should not support decode_to_embedding"
);
}
#[test]
fn test_profile_max_length_constraint() {
let embedding: Vec<f32> = (0..768).map(|i| (i as f32 / 384.0) - 1.0).collect();
let max_chars = 100;
let profile = Profile::max_length(max_chars, 768);
let elid = encode(&embedding, &profile).unwrap();
assert!(
elid.as_str().len() <= max_chars,
"ELID length {} exceeds max {}",
elid.as_str().len(),
max_chars
);
}
#[test]
fn test_profile_compressed_retention() {
let embedding: Vec<f32> = (0..768).map(|i| (i as f32 / 384.0) - 1.0).collect();
for retention in [1.0, 0.5, 0.25, 0.1] {
let profile = Profile::compressed(retention, 768);
let elid = encode(&embedding, &profile).unwrap();
assert!(is_reversible(&elid));
let full_size = Profile::lossless().string_length_for_dims(768);
let compressed_size = elid.as_str().len();
if retention < 0.9 {
assert!(
compressed_size < full_size,
"Retention {} should reduce size (full: {}, compressed: {})",
retention,
full_size,
compressed_size
);
}
}
}
}