anno_core/core/provisional.rs
1//! Provisional types for experimental features.
2//!
3//! Types in this module are not yet stable and may change or be removed.
4//! They exist to enable experimentation without polluting the core type system.
5//!
6//! # Currently Provisional
7//!
8//! - [`BoxEmbedding`]: Geometric box embeddings for coreference (research stage)
9//! - [`ProvisionalIdentity`]: Identity wrapper with experimental fields
10//!
11//! # Why Provisional?
12//!
13//! Some features are valuable for research but not ready for the stable API:
14//!
15//! - **Representation may change**: Box embedding format is still being refined
16//! - **Performance unproven**: Haven't benchmarked at scale
17//! - **API surface unclear**: Don't know the right abstractions yet
18//!
19//! # Migration Path
20//!
21//! When a provisional type stabilizes:
22//! 1. Move it to the appropriate module (`grounded`, `entity`, etc.)
23//! 2. Add deprecation warning to the re-export here
24//! 3. Remove after one major version
25
26use serde::{Deserialize, Serialize};
27
28/// Box embedding for geometric coreference resolution.
29///
30/// Uses axis-aligned hyperrectangles to encode logical invariants.
31/// This is based on research showing that box embeddings can capture
32/// containment relationships better than vector embeddings.
33///
34/// # Status: Experimental
35///
36/// This type may change significantly. The current representation uses
37/// `serde_json::Value` as a placeholder; a proper typed representation
38/// will be added once the embedding format stabilizes.
39///
40/// # References
41///
42/// - Vilnis et al., "Probabilistic Embedding of Knowledge Graphs with Box Lattice Measures"
43/// - Dasgupta et al., "Improving Local Identifiability in Probabilistic Box Embeddings"
44#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
45pub struct BoxEmbedding {
46 /// Minimum corner of the box (lower bounds).
47 pub min: Vec<f32>,
48 /// Maximum corner of the box (upper bounds).
49 pub max: Vec<f32>,
50 /// Temperature parameter for softbox formulation.
51 #[serde(default, skip_serializing_if = "Option::is_none")]
52 pub temperature: Option<f32>,
53}
54
55impl BoxEmbedding {
56 /// Create a new box embedding.
57 #[must_use]
58 pub fn new(min: Vec<f32>, max: Vec<f32>) -> Self {
59 Self {
60 min,
61 max,
62 temperature: None,
63 }
64 }
65
66 /// Get the dimensionality of the box.
67 #[must_use]
68 pub fn dim(&self) -> usize {
69 self.min.len()
70 }
71
72 /// Check if this box is valid (min <= max in all dimensions).
73 #[must_use]
74 pub fn is_valid(&self) -> bool {
75 self.min.len() == self.max.len() && self.min.iter().zip(&self.max).all(|(lo, hi)| lo <= hi)
76 }
77
78 /// Compute the volume of the box (product of side lengths).
79 #[must_use]
80 pub fn volume(&self) -> f32 {
81 self.min
82 .iter()
83 .zip(&self.max)
84 .map(|(lo, hi)| (hi - lo).max(0.0))
85 .product()
86 }
87
88 /// Check if this box contains a point.
89 #[must_use]
90 pub fn contains_point(&self, point: &[f32]) -> bool {
91 point.len() == self.min.len()
92 && point
93 .iter()
94 .zip(&self.min)
95 .zip(&self.max)
96 .all(|((p, lo), hi)| p >= lo && p <= hi)
97 }
98
99 /// Check if this box contains another box.
100 #[must_use]
101 pub fn contains_box(&self, other: &BoxEmbedding) -> bool {
102 self.min.len() == other.min.len()
103 && self.min.iter().zip(&other.min).all(|(s, o)| s <= o)
104 && self.max.iter().zip(&other.max).all(|(s, o)| s >= o)
105 }
106
107 /// Compute intersection volume with another box.
108 #[must_use]
109 pub fn intersection_volume(&self, other: &BoxEmbedding) -> f32 {
110 if self.min.len() != other.min.len() {
111 return 0.0;
112 }
113
114 self.min
115 .iter()
116 .zip(&self.max)
117 .zip(other.min.iter().zip(&other.max))
118 .map(|((lo1, hi1), (lo2, hi2))| {
119 let lo = lo1.max(*lo2);
120 let hi = hi1.min(*hi2);
121 (hi - lo).max(0.0)
122 })
123 .product()
124 }
125
126 /// Convert from a JSON value (for backwards compatibility with existing data).
127 pub fn from_json(value: &serde_json::Value) -> Option<Self> {
128 serde_json::from_value(value.clone()).ok()
129 }
130
131 /// Convert to a JSON value (for backwards compatibility).
132 #[must_use]
133 pub fn to_json(&self) -> serde_json::Value {
134 serde_json::to_value(self).unwrap_or(serde_json::Value::Null)
135 }
136}
137
138/// Identity extension with provisional fields.
139///
140/// Use this when you need box embeddings or other experimental features
141/// without modifying the core `Identity` type.
142///
143/// # Example
144///
145/// ```rust,ignore
146/// use anno_core::grounded::Identity;
147/// use anno_core::provisional::{ProvisionalIdentity, BoxEmbedding};
148///
149/// let identity = Identity::new(0, "Marie Curie");
150/// let provisional = ProvisionalIdentity::from_identity(identity)
151/// .with_box_embedding(BoxEmbedding::new(vec![0.0; 64], vec![1.0; 64]));
152/// ```
153#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
154pub struct ProvisionalIdentity {
155 /// The base identity.
156 pub base: super::grounded::Identity,
157 /// Optional box embedding for geometric coreference.
158 #[serde(default, skip_serializing_if = "Option::is_none")]
159 pub box_embedding: Option<BoxEmbedding>,
160}
161
162impl ProvisionalIdentity {
163 /// Create a provisional identity from a base identity.
164 #[must_use]
165 pub fn from_identity(base: super::grounded::Identity) -> Self {
166 Self {
167 base,
168 box_embedding: None,
169 }
170 }
171
172 /// Add a box embedding.
173 #[must_use]
174 pub fn with_box_embedding(mut self, embedding: BoxEmbedding) -> Self {
175 self.box_embedding = Some(embedding);
176 self
177 }
178}
179
180#[cfg(test)]
181mod tests {
182 use super::*;
183
184 #[test]
185 fn test_box_embedding_basic() {
186 let box_emb = BoxEmbedding::new(vec![0.0, 0.0], vec![1.0, 1.0]);
187 assert!(box_emb.is_valid());
188 assert_eq!(box_emb.dim(), 2);
189 assert!((box_emb.volume() - 1.0).abs() < 1e-6);
190 }
191
192 #[test]
193 fn test_box_embedding_containment() {
194 let outer = BoxEmbedding::new(vec![0.0, 0.0], vec![2.0, 2.0]);
195 let inner = BoxEmbedding::new(vec![0.5, 0.5], vec![1.5, 1.5]);
196
197 assert!(outer.contains_box(&inner));
198 assert!(!inner.contains_box(&outer));
199 }
200
201 #[test]
202 fn test_box_embedding_intersection() {
203 let box1 = BoxEmbedding::new(vec![0.0, 0.0], vec![2.0, 2.0]);
204 let box2 = BoxEmbedding::new(vec![1.0, 1.0], vec![3.0, 3.0]);
205
206 let intersection = box1.intersection_volume(&box2);
207 assert!((intersection - 1.0).abs() < 1e-6); // 1x1 overlap
208 }
209
210 #[test]
211 fn test_box_embedding_serde() {
212 let original = BoxEmbedding::new(vec![0.0, 1.0, 2.0], vec![1.0, 2.0, 3.0]);
213 let json = serde_json::to_string(&original).unwrap();
214 let parsed: BoxEmbedding = serde_json::from_str(&json).unwrap();
215 assert_eq!(original, parsed);
216 }
217}