distx_schema/
lib.rs

1//! # DistX Schema
2//!
3//! **DistX does not store vectors that represent objects.
4//! It stores objects, and derives vectors from their structure.**
5//!
6//! This crate provides the Similarity Contract engine for DistX — a schema-driven
7//! approach to structured similarity search over tabular data.
8//!
9//! ## The Similarity Contract
10//!
11//! The schema is not just configuration — it's a **contract** that governs:
12//!
13//! - **Ingest**: How objects are converted to vectors (deterministic, reproducible)
14//! - **Query**: How similarity is computed across multiple field types
15//! - **Ranking**: How results are scored and ordered
16//! - **Explainability**: How each field contributes to the final score
17//!
18//! ## What This Is NOT
19//!
20//! DistX Schema is **not**:
21//! - A neural embedding model (no ML, no training, no drift)
22//! - A semantic LLM system (deterministic, not probabilistic)
23//! - A black-box recommender (fully explainable scores)
24//!
25//! It **is**:
26//! - A contract-based similarity engine for structured data
27//! - Deterministic and reproducible (same input → same output, always)
28//! - Designed for ERP, e-commerce, CRM, and tabular datasets
29//!
30//! ## Supported Field Types
31//!
32//! | Type | Distance Methods | Use Case |
33//! |------|-----------------|----------|
34//! | `text` | trigram hashing | Product names, descriptions |
35//! | `number` | relative, absolute | Prices, quantities, scores |
36//! | `categorical` | exact match hashing | Categories, brands, status |
37//! | `boolean` | equality | Flags, availability |
38//!
39//! ## Example
40//!
41//! ```rust
42//! use distx_schema::{SimilaritySchema, FieldConfig, DistanceType, StructuredEmbedder, Reranker};
43//! use std::collections::HashMap;
44//! use serde_json::json;
45//!
46//! // Define a Similarity Contract
47//! let mut fields = HashMap::new();
48//! fields.insert("name".to_string(), FieldConfig::text(0.5));
49//! fields.insert("price".to_string(), FieldConfig::number(0.3, DistanceType::Relative));
50//! fields.insert("category".to_string(), FieldConfig::categorical(0.2));
51//!
52//! let mut schema = SimilaritySchema::new(fields);
53//! schema.validate_and_normalize().unwrap();
54//!
55//! // Derive vector from object structure
56//! let embedder = StructuredEmbedder::new(schema.clone());
57//! let payload = json!({
58//!     "name": "Prosciutto cotto",
59//!     "price": 1.99,
60//!     "category": "salumi"
61//! });
62//! let vector = embedder.embed(&payload);
63//!
64//! // Rerank with explainable scoring
65//! let reranker = Reranker::new(schema);
66//! // ... rerank ANN candidates with per-field scoring
67//! ```
68//!
69//! ## Architecture
70//!
71//! ```text
72//! ┌─────────────────────────────────────────────────────────────────┐
73//! │                    SIMILARITY CONTRACT                          │
74//! │  ┌─────────────┐     ┌─────────────┐     ┌─────────────┐       │
75//! │  │   Schema    │────>│  Embedder   │────>│   Vector    │       │
76//! │  │ (contract)  │     │(deterministic)    │   Store     │       │
77//! │  └─────────────┘     └─────────────┘     └─────────────┘       │
78//! │        │                                        │               │
79//! │        │              ┌─────────────┐           │               │
80//! │        └─────────────>│  Reranker   │<──────────┘               │
81//! │                       │(structured) │                           │
82//! │                       └─────────────┘                           │
83//! │                              │                                  │
84//! │                       ┌─────────────┐                           │
85//! │                       │  Explain    │                           │
86//! │                       │(per-field)  │                           │
87//! │                       └─────────────┘                           │
88//! └─────────────────────────────────────────────────────────────────┘
89//! ```
90
91pub mod schema;
92pub mod distance;
93pub mod embedder;
94pub mod rerank;
95pub mod explain;
96
97// Re-export main types for convenience
98pub use schema::{
99    SimilaritySchema, 
100    FieldConfig, 
101    FieldType, 
102    DistanceType, 
103    EmbeddingType,
104    SchemaError,
105};
106pub use embedder::{StructuredEmbedder, EmbedderBuilder, DEFAULT_TEXT_DIM, DEFAULT_CATEGORICAL_DIM};
107pub use rerank::{Reranker, RankedResult};
108pub use explain::{ExplainedResult, SimilarResponse, SimilarityStats, PointIdSer};