distx_similarity/
lib.rs

1//! # DistX Similarity
2//!
3//! A schema-driven similarity engine for tabular rows.
4//!
5//! This crate provides structured similarity queries on top of DistX,
6//! enabling similarity search over tabular data with explainable results.
7//!
8//! ## Features
9//!
10//! - **Similarity Schema**: Declarative schema defining which fields matter and their weights
11//! - **Structured Embedding**: Automatic vector generation from payloads
12//! - **Multi-field Reranking**: Accurate similarity scores combining multiple field types
13//! - **Explainability**: Per-field contribution breakdown for transparency
14//!
15//! ## Example
16//!
17//! ```rust
18//! use distx_similarity::{SimilaritySchema, FieldConfig, DistanceType, StructuredEmbedder, Reranker};
19//! use std::collections::HashMap;
20//! use serde_json::json;
21//!
22//! // Define a similarity schema
23//! let mut fields = HashMap::new();
24//! fields.insert("name".to_string(), FieldConfig::text(0.5));
25//! fields.insert("price".to_string(), FieldConfig::number(0.3, DistanceType::Relative));
26//! fields.insert("category".to_string(), FieldConfig::categorical(0.2));
27//!
28//! let mut schema = SimilaritySchema::new(fields);
29//! schema.validate_and_normalize().unwrap();
30//!
31//! // Create embedder and embed a payload
32//! let embedder = StructuredEmbedder::new(schema.clone());
33//! let payload = json!({
34//!     "name": "Prosciutto cotto",
35//!     "price": 1.99,
36//!     "category": "salumi"
37//! });
38//! let vector = embedder.embed(&payload);
39//!
40//! // Use reranker for accurate similarity scoring
41//! let reranker = Reranker::new(schema);
42//! // ... rerank ANN candidates with structured scoring
43//! ```
44//!
45//! ## Architecture
46//!
47//! ```text
48//! ┌─────────────┐     ┌─────────────┐     ┌─────────────┐
49//! │   Schema    │────>│  Embedder   │────>│   Vector    │
50//! │  (fields)   │     │ (payload→v) │     │   Store     │
51//! └─────────────┘     └─────────────┘     └─────────────┘
52//!       │                                        │
53//!       │              ┌─────────────┐           │
54//!       └─────────────>│  Reranker   │<──────────┘
55//!                      │ (candidates)│
56//!                      └─────────────┘
57//!                             │
58//!                      ┌─────────────┐
59//!                      │  Explain    │
60//!                      │  (results)  │
61//!                      └─────────────┘
62//! ```
63
64pub mod schema;
65pub mod distance;
66pub mod embedder;
67pub mod rerank;
68pub mod explain;
69
70// Re-export main types for convenience
71pub use schema::{
72    SimilaritySchema, 
73    FieldConfig, 
74    FieldType, 
75    DistanceType, 
76    EmbeddingType,
77    SchemaError,
78};
79pub use embedder::{StructuredEmbedder, EmbedderBuilder, DEFAULT_TEXT_DIM, DEFAULT_CATEGORICAL_DIM};
80pub use rerank::{Reranker, RankedResult};
81pub use explain::{ExplainedResult, SimilarResponse, SimilarityStats, PointIdSer};