Skip to main content

anno_core/core/
provisional.rs

1//! Provisional types for experimental features.
2//!
3//! Types in this module are not yet stable and may change or be removed.
4//! They exist to enable experimentation without polluting the core type system.
5//!
6//! # Currently Provisional
7//!
8//! - [`BoxEmbedding`]: Geometric box embeddings for coreference (research stage)
9//! - [`ProvisionalIdentity`]: Identity wrapper with experimental fields
10//!
11//! # Why Provisional?
12//!
13//! Some features are valuable for research but not ready for the stable API:
14//!
15//! - **Representation may change**: Box embedding format is still being refined
16//! - **Performance unproven**: Haven't benchmarked at scale
17//! - **API surface unclear**: Don't know the right abstractions yet
18//!
19//! # Migration Path
20//!
21//! When a provisional type stabilizes:
22//! 1. Move it to the appropriate module (`grounded`, `entity`, etc.)
23//! 2. Add deprecation warning to the re-export here
24//! 3. Remove after one major version
25
26use serde::{Deserialize, Serialize};
27
28/// Box embedding for geometric coreference resolution.
29///
30/// Uses axis-aligned hyperrectangles to encode logical invariants.
31/// This is based on research showing that box embeddings can capture
32/// containment relationships better than vector embeddings.
33///
34/// # Status: Experimental
35///
36/// This type may change significantly. The current representation uses
37/// `serde_json::Value` as a placeholder; a proper typed representation
38/// will be added once the embedding format stabilizes.
39///
40/// # References
41///
42/// - Vilnis et al., "Probabilistic Embedding of Knowledge Graphs with Box Lattice Measures"
43/// - Dasgupta et al., "Improving Local Identifiability in Probabilistic Box Embeddings"
44#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
45pub struct BoxEmbedding {
46    /// Minimum corner of the box (lower bounds).
47    pub min: Vec<f32>,
48    /// Maximum corner of the box (upper bounds).
49    pub max: Vec<f32>,
50    /// Temperature parameter for softbox formulation.
51    #[serde(default, skip_serializing_if = "Option::is_none")]
52    pub temperature: Option<f32>,
53}
54
55impl BoxEmbedding {
56    /// Create a new box embedding.
57    #[must_use]
58    pub fn new(min: Vec<f32>, max: Vec<f32>) -> Self {
59        Self {
60            min,
61            max,
62            temperature: None,
63        }
64    }
65
66    /// Get the dimensionality of the box.
67    #[must_use]
68    pub fn dim(&self) -> usize {
69        self.min.len()
70    }
71
72    /// Check if this box is valid (min <= max in all dimensions).
73    #[must_use]
74    pub fn is_valid(&self) -> bool {
75        self.min.len() == self.max.len() && self.min.iter().zip(&self.max).all(|(lo, hi)| lo <= hi)
76    }
77
78    /// Compute the volume of the box (product of side lengths).
79    #[must_use]
80    pub fn volume(&self) -> f32 {
81        self.min
82            .iter()
83            .zip(&self.max)
84            .map(|(lo, hi)| (hi - lo).max(0.0))
85            .product()
86    }
87
88    /// Check if this box contains a point.
89    #[must_use]
90    pub fn contains_point(&self, point: &[f32]) -> bool {
91        point.len() == self.min.len()
92            && point
93                .iter()
94                .zip(&self.min)
95                .zip(&self.max)
96                .all(|((p, lo), hi)| p >= lo && p <= hi)
97    }
98
99    /// Check if this box contains another box.
100    #[must_use]
101    pub fn contains_box(&self, other: &BoxEmbedding) -> bool {
102        self.min.len() == other.min.len()
103            && self.min.iter().zip(&other.min).all(|(s, o)| s <= o)
104            && self.max.iter().zip(&other.max).all(|(s, o)| s >= o)
105    }
106
107    /// Compute intersection volume with another box.
108    #[must_use]
109    pub fn intersection_volume(&self, other: &BoxEmbedding) -> f32 {
110        if self.min.len() != other.min.len() {
111            return 0.0;
112        }
113
114        self.min
115            .iter()
116            .zip(&self.max)
117            .zip(other.min.iter().zip(&other.max))
118            .map(|((lo1, hi1), (lo2, hi2))| {
119                let lo = lo1.max(*lo2);
120                let hi = hi1.min(*hi2);
121                (hi - lo).max(0.0)
122            })
123            .product()
124    }
125
126    /// Convert from a JSON value (for backwards compatibility with existing data).
127    pub fn from_json(value: &serde_json::Value) -> Option<Self> {
128        serde_json::from_value(value.clone()).ok()
129    }
130
131    /// Convert to a JSON value (for backwards compatibility).
132    #[must_use]
133    pub fn to_json(&self) -> serde_json::Value {
134        serde_json::to_value(self).unwrap_or(serde_json::Value::Null)
135    }
136}
137
138/// Identity extension with provisional fields.
139///
140/// Use this when you need box embeddings or other experimental features
141/// without modifying the core `Identity` type.
142///
143/// # Example
144///
145/// ```rust
146/// use anno_core::Identity;
147/// use anno_core::core::provisional::{ProvisionalIdentity, BoxEmbedding};
148///
149/// let identity = Identity::new(0, "Marie Curie");
150/// let provisional = ProvisionalIdentity::from_identity(identity)
151///     .with_box_embedding(BoxEmbedding::new(vec![0.0; 64], vec![1.0; 64]));
152/// ```
153#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
154pub struct ProvisionalIdentity {
155    /// The base identity.
156    pub base: super::grounded::Identity,
157    /// Optional box embedding for geometric coreference.
158    #[serde(default, skip_serializing_if = "Option::is_none")]
159    pub box_embedding: Option<BoxEmbedding>,
160}
161
162impl ProvisionalIdentity {
163    /// Create a provisional identity from a base identity.
164    #[must_use]
165    pub fn from_identity(base: super::grounded::Identity) -> Self {
166        Self {
167            base,
168            box_embedding: None,
169        }
170    }
171
172    /// Add a box embedding.
173    #[must_use]
174    pub fn with_box_embedding(mut self, embedding: BoxEmbedding) -> Self {
175        self.box_embedding = Some(embedding);
176        self
177    }
178}
179
180#[cfg(test)]
181mod tests {
182    use super::*;
183
184    #[test]
185    fn test_box_embedding_basic() {
186        let box_emb = BoxEmbedding::new(vec![0.0, 0.0], vec![1.0, 1.0]);
187        assert!(box_emb.is_valid());
188        assert_eq!(box_emb.dim(), 2);
189        assert!((box_emb.volume() - 1.0).abs() < 1e-6);
190    }
191
192    #[test]
193    fn test_box_embedding_containment() {
194        let outer = BoxEmbedding::new(vec![0.0, 0.0], vec![2.0, 2.0]);
195        let inner = BoxEmbedding::new(vec![0.5, 0.5], vec![1.5, 1.5]);
196
197        assert!(outer.contains_box(&inner));
198        assert!(!inner.contains_box(&outer));
199    }
200
201    #[test]
202    fn test_box_embedding_intersection() {
203        let box1 = BoxEmbedding::new(vec![0.0, 0.0], vec![2.0, 2.0]);
204        let box2 = BoxEmbedding::new(vec![1.0, 1.0], vec![3.0, 3.0]);
205
206        let intersection = box1.intersection_volume(&box2);
207        assert!((intersection - 1.0).abs() < 1e-6); // 1x1 overlap
208    }
209
210    #[test]
211    fn test_box_embedding_serde() {
212        let original = BoxEmbedding::new(vec![0.0, 1.0, 2.0], vec![1.0, 2.0, 3.0]);
213        let json = serde_json::to_string(&original).unwrap();
214        let parsed: BoxEmbedding = serde_json::from_str(&json).unwrap();
215        assert_eq!(original, parsed);
216    }
217}