use triblespace_core::inline::Encodes;
use std::convert::Infallible;
use anybytes::View;
use triblespace_core::blob::{Blob, BlobEncoding, TryFromBlob};
use triblespace_core::id::ExclusiveId;
use triblespace_core::id_hex;
use triblespace_core::macros::entity;
use triblespace_core::metadata::{self, MetaDescribe};
use triblespace_core::trible::Fragment;
use triblespace_core::inline::{IntoInline, TryFromInline, Inline, InlineEncoding};
pub enum F32LE {}
impl MetaDescribe for F32LE {
fn describe() -> Fragment {
let id = id_hex!("816B4751EA8C12644CCB572F36188EBA");
entity! { ExclusiveId::force_ref(&id) @
metadata::name: "F32LE",
metadata::description: "32-bit IEEE-754 float stored little-endian in the first 4 bytes of the 32-byte Inline, with the rest zero-padded.",
metadata::tag: metadata::KIND_INLINE_ENCODING,
}
}
}
impl InlineEncoding for F32LE {
type ValidationError = Infallible;
type Encoding = Self;
}
impl Encodes<f32> for F32LE
{
type Output = Inline<F32LE>;
fn encode(source: f32) -> Inline<F32LE> {
let mut raw = [0u8; 32];
raw[0..4].copy_from_slice(&source.to_le_bytes());
Inline::new(raw)
}
}
impl Encodes<&f32> for F32LE
{
type Output = Inline<F32LE>;
fn encode(source: &f32) -> Inline<F32LE> {
(*source).to_inline()
}
}
impl TryFromInline<'_, F32LE> for f32 {
type Error = Infallible;
fn try_from_inline(value: &Inline<F32LE>) -> Result<Self, Self::Error> {
Ok(f32::from_le_bytes(value.raw[0..4].try_into().unwrap()))
}
}
pub struct Embedding {}
impl BlobEncoding for Embedding {}
impl MetaDescribe for Embedding {
fn describe() -> Fragment {
let id = id_hex!("EEC5DFDEA2FFCED70850DF83B03CB62B");
entity! { ExclusiveId::force_ref(&id) @
metadata::name: "Embedding",
metadata::description: "Arbitrary-length [f32] (little-endian) stored as a blob. Used as the L2-normalized vector representation of an entity in HNSW indexes; length = dim × 4, dim isn't recorded in the blob header — the index that owns the handle carries it.",
metadata::tag: metadata::KIND_BLOB_ENCODING,
}
}
}
pub type EmbHandle = triblespace_core::inline::encodings::hash::Handle<Embedding>;
impl TryFromBlob<Embedding> for View<[f32]> {
type Error = anybytes::view::ViewError;
fn try_from_blob(b: Blob<Embedding>) -> Result<Self, Self::Error> {
b.bytes.view()
}
}
impl Encodes<View<[f32]>> for Embedding
where triblespace_core::inline::encodings::hash::Handle<Embedding>: triblespace_core::inline::InlineEncoding,
{
type Output = Blob<Embedding>;
fn encode(source: View<[f32]>) -> Blob<Embedding> {
Blob::new(source.bytes())
}
}
impl Encodes<Vec<f32>> for Embedding
where triblespace_core::inline::encodings::hash::Handle<Embedding>: triblespace_core::inline::InlineEncoding,
{
type Output = Blob<Embedding>;
fn encode(source: Vec<f32>) -> Blob<Embedding> {
let mut bytes = Vec::with_capacity(source.len() * 4);
for v in &source {
bytes.extend_from_slice(&v.to_le_bytes());
}
Blob::new(bytes.into())
}
}
impl Encodes<&[f32]> for Embedding
where triblespace_core::inline::encodings::hash::Handle<Embedding>: triblespace_core::inline::InlineEncoding,
{
type Output = Blob<Embedding>;
fn encode(source: &[f32]) -> Blob<Embedding> {
let mut bytes = Vec::with_capacity(source.len() * 4);
for v in source {
bytes.extend_from_slice(&v.to_le_bytes());
}
Blob::new(bytes.into())
}
}
pub fn l2_normalize(vec: &mut [f32]) {
let norm: f32 = vec.iter().map(|&x| x * x).sum::<f32>().sqrt();
if norm > 0.0 {
let inv = 1.0 / norm;
for v in vec.iter_mut() {
*v *= inv;
}
}
}
pub fn put_embedding<B>(
store: &mut B,
mut vec: Vec<f32>,
) -> Result<triblespace_core::inline::Inline<triblespace_core::inline::encodings::hash::Handle<Embedding>>, B::PutError>
where
B: triblespace_core::repo::BlobStorePut,
triblespace_core::inline::encodings::hash::Handle<Embedding>:
triblespace_core::inline::InlineEncoding,
{
l2_normalize(&mut vec);
store.put::<Embedding, _>(vec)
}
#[cfg(test)]
mod tests {
use super::*;
use triblespace_core::blob::IntoBlob;
#[test]
fn round_trip_positive() {
let original: f32 = 0.123;
let v: Inline<F32LE> = original.to_inline();
let back: f32 = f32::try_from_inline(&v).unwrap();
assert_eq!(original, back);
}
#[test]
fn round_trip_negative() {
let original: f32 = -42.75;
let v: Inline<F32LE> = original.to_inline();
let back: f32 = f32::try_from_inline(&v).unwrap();
assert_eq!(original, back);
}
#[test]
fn round_trip_zero() {
let original: f32 = 0.0;
let v: Inline<F32LE> = original.to_inline();
let back: f32 = f32::try_from_inline(&v).unwrap();
assert_eq!(original.to_bits(), back.to_bits());
}
#[test]
fn round_trip_nan() {
let original: f32 = f32::NAN;
let v: Inline<F32LE> = original.to_inline();
let back: f32 = f32::try_from_inline(&v).unwrap();
assert!(back.is_nan());
}
#[test]
fn padding_is_zero() {
let v: Inline<F32LE> = 2.5f32.to_inline();
assert_eq!(&v.raw[4..32], &[0u8; 28]);
}
#[test]
fn deterministic_same_input_same_value() {
let a: Inline<F32LE> = 1.5f32.to_inline();
let b: Inline<F32LE> = 1.5f32.to_inline();
assert_eq!(a.raw, b.raw);
}
#[test]
fn embedding_blob_round_trip() {
let original: Vec<f32> = vec![0.1, -0.5, 3.25, f32::consts::PI];
let blob: Blob<Embedding> = original.clone().to_blob();
let view: View<[f32]> = TryFromBlob::try_from_blob(blob).unwrap();
assert_eq!(view.as_ref(), original.as_slice());
}
#[test]
fn put_embedding_roundtrips_through_memory_store() {
use triblespace_core::blob::MemoryBlobStore;
use triblespace_core::repo::{BlobStore, BlobStoreGet};
let mut store = MemoryBlobStore::new();
let vec = vec![1.0_f32, 0.0, 0.0];
let handle = put_embedding::<_>(&mut store, vec.clone()).unwrap();
let reader = store.reader().unwrap();
let view: View<[f32]> = reader.get::<View<[f32]>, Embedding>(handle).unwrap();
assert_eq!(view.as_ref(), &[1.0_f32, 0.0, 0.0]);
}
#[test]
fn embedding_handle_is_content_addressed() {
use triblespace_core::inline::encodings::hash::Handle;
let v1: Vec<f32> = vec![1.0, 2.0, 3.0];
let v2: Vec<f32> = vec![1.0, 2.0, 3.0];
let v3: Vec<f32> = vec![1.0, 2.0, 3.0, 4.0];
let h1: Inline<Handle<Embedding>> = v1.to_blob().get_handle();
let h2: Inline<Handle<Embedding>> = v2.to_blob().get_handle();
let h3: Inline<Handle<Embedding>> = v3.to_blob().get_handle();
assert_eq!(h1, h2, "identical vectors must dedup by handle");
assert_ne!(h1, h3, "different vectors must have different handles");
}
use std::f32;
}