distx_similarity/lib.rs
1//! # DistX Similarity
2//!
3//! A schema-driven similarity engine for tabular rows.
4//!
5//! This crate provides structured similarity queries on top of DistX,
6//! enabling similarity search over tabular data with explainable results.
7//!
8//! ## Features
9//!
10//! - **Similarity Schema**: Declarative schema defining which fields matter and their weights
11//! - **Structured Embedding**: Automatic vector generation from payloads
12//! - **Multi-field Reranking**: Accurate similarity scores combining multiple field types
13//! - **Explainability**: Per-field contribution breakdown for transparency
14//!
15//! ## Example
16//!
17//! ```rust
18//! use distx_similarity::{SimilaritySchema, FieldConfig, DistanceType, StructuredEmbedder, Reranker};
19//! use std::collections::HashMap;
20//! use serde_json::json;
21//!
22//! // Define a similarity schema
23//! let mut fields = HashMap::new();
24//! fields.insert("name".to_string(), FieldConfig::text(0.5));
25//! fields.insert("price".to_string(), FieldConfig::number(0.3, DistanceType::Relative));
26//! fields.insert("category".to_string(), FieldConfig::categorical(0.2));
27//!
28//! let mut schema = SimilaritySchema::new(fields);
29//! schema.validate_and_normalize().unwrap();
30//!
31//! // Create embedder and embed a payload
32//! let embedder = StructuredEmbedder::new(schema.clone());
33//! let payload = json!({
34//! "name": "Prosciutto cotto",
35//! "price": 1.99,
36//! "category": "salumi"
37//! });
38//! let vector = embedder.embed(&payload);
39//!
40//! // Use reranker for accurate similarity scoring
41//! let reranker = Reranker::new(schema);
42//! // ... rerank ANN candidates with structured scoring
43//! ```
44//!
45//! ## Architecture
46//!
47//! ```text
48//! ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
49//! │ Schema │────>│ Embedder │────>│ Vector │
50//! │ (fields) │ │ (payload→v) │ │ Store │
51//! └─────────────┘ └─────────────┘ └─────────────┘
52//! │ │
53//! │ ┌─────────────┐ │
54//! └─────────────>│ Reranker │<──────────┘
55//! │ (candidates)│
56//! └─────────────┘
57//! │
58//! ┌─────────────┐
59//! │ Explain │
60//! │ (results) │
61//! └─────────────┘
62//! ```
63
64pub mod schema;
65pub mod distance;
66pub mod embedder;
67pub mod rerank;
68pub mod explain;
69
70// Re-export main types for convenience
71pub use schema::{
72 SimilaritySchema,
73 FieldConfig,
74 FieldType,
75 DistanceType,
76 EmbeddingType,
77 SchemaError,
78};
79pub use embedder::{StructuredEmbedder, EmbedderBuilder, DEFAULT_TEXT_DIM, DEFAULT_CATEGORICAL_DIM};
80pub use rerank::{Reranker, RankedResult};
81pub use explain::{ExplainedResult, SimilarResponse, SimilarityStats, PointIdSer};