codanna 0.9.19

Code Intelligence for Large Language Models
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
//! Type-safe wrappers and core types for vector search functionality.
//!
//! This module provides newtypes and error types following the project's
//! strict type safety guidelines. All types implement necessary traits
//! for ergonomic usage while preventing primitive obsession.

use std::num::NonZeroU32;
use thiserror::Error;

/// Standard vector dimension for code embeddings (all-MiniLM-L6-v2 model).
pub const VECTOR_DIMENSION_384: usize = 384;

/// Type-safe wrapper for vector IDs.
///
/// Uses `NonZeroU32` internally for space optimization and to ensure
/// vector IDs are never zero (which could indicate uninitialized state).
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct VectorId(NonZeroU32);

impl VectorId {
    /// Creates a new `VectorId` from a non-zero u32.
    ///
    /// Returns `None` if the provided ID is zero.
    #[must_use]
    pub fn new(id: u32) -> Option<Self> {
        NonZeroU32::new(id).map(Self)
    }

    /// Creates a new `VectorId` from a non-zero u32, panicking if zero.
    ///
    /// # Panics
    /// Panics if `id` is zero. Use `new()` for fallible construction.
    #[must_use]
    pub fn new_unchecked(id: u32) -> Self {
        Self(NonZeroU32::new(id).expect("VectorId cannot be zero"))
    }

    /// Returns the underlying u32 value.
    #[must_use]
    pub fn get(&self) -> u32 {
        self.0.get()
    }

    /// Converts to little-endian bytes for storage.
    #[must_use]
    pub fn to_bytes(&self) -> [u8; 4] {
        self.0.get().to_le_bytes()
    }

    /// Creates from little-endian bytes.
    ///
    /// Returns `None` if the bytes represent zero.
    #[must_use]
    pub fn from_bytes(bytes: [u8; 4]) -> Option<Self> {
        let id = u32::from_le_bytes(bytes);
        Self::new(id)
    }
}

/// Type-safe wrapper for cluster IDs in IVFFlat indexing.
///
/// Clusters are identified by non-zero IDs to prevent confusion
/// with uninitialized or error states.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct ClusterId(NonZeroU32);

impl ClusterId {
    /// Creates a new `ClusterId` from a non-zero u32.
    ///
    /// Returns `None` if the provided ID is zero.
    #[must_use]
    pub fn new(id: u32) -> Option<Self> {
        NonZeroU32::new(id).map(Self)
    }

    /// Creates a new `ClusterId` from a non-zero u32, panicking if zero.
    ///
    /// # Panics
    /// Panics if `id` is zero. Use `new()` for fallible construction.
    #[must_use]
    pub fn new_unchecked(id: u32) -> Self {
        Self(NonZeroU32::new(id).expect("ClusterId cannot be zero"))
    }

    /// Returns the underlying u32 value.
    #[must_use]
    pub fn get(&self) -> u32 {
        self.0.get()
    }

    /// Converts to little-endian bytes for storage.
    #[must_use]
    pub fn to_bytes(&self) -> [u8; 4] {
        self.0.get().to_le_bytes()
    }

    /// Creates from little-endian bytes.
    ///
    /// Returns `None` if the bytes represent zero.
    #[must_use]
    pub fn from_bytes(bytes: [u8; 4]) -> Option<Self> {
        let id = u32::from_le_bytes(bytes);
        Self::new(id)
    }
}

/// Type-safe wrapper for Tantivy segment ordinals.
///
/// Segment ordinals can be zero (for the first segment), so we use
/// a plain u32 rather than NonZeroU32.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct SegmentOrdinal(u32);

impl SegmentOrdinal {
    /// Creates a new `SegmentOrdinal`.
    #[must_use]
    pub const fn new(ordinal: u32) -> Self {
        Self(ordinal)
    }

    /// Returns the underlying u32 value.
    #[must_use]
    pub const fn get(&self) -> u32 {
        self.0
    }

    /// Converts to little-endian bytes for storage.
    #[must_use]
    pub fn to_bytes(&self) -> [u8; 4] {
        self.0.to_le_bytes()
    }

    /// Creates from little-endian bytes.
    #[must_use]
    pub fn from_bytes(bytes: [u8; 4]) -> Self {
        Self(u32::from_le_bytes(bytes))
    }
}

impl std::fmt::Display for SegmentOrdinal {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.0)
    }
}

/// Type-safe wrapper for similarity scores.
///
/// Scores are normalized to the range [0.0, 1.0] where:
/// - 1.0 indicates perfect similarity
/// - 0.0 indicates no similarity
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Score(f32);

impl Score {
    /// Creates a new `Score` with validation.
    ///
    /// Returns an error if the score is not in the range [0.0, 1.0] or is NaN.
    pub fn new(value: f32) -> Result<Self, VectorError> {
        if value.is_nan() {
            return Err(VectorError::InvalidScore {
                value,
                reason: "Score cannot be NaN",
            });
        }
        if !(0.0..=1.0).contains(&value) {
            return Err(VectorError::InvalidScore {
                value,
                reason: "Score must be in range [0.0, 1.0]",
            });
        }
        Ok(Self(value))
    }

    /// Creates a score of 0.0 (no similarity).
    #[must_use]
    pub const fn zero() -> Self {
        Self(0.0)
    }

    /// Creates a score of 1.0 (perfect similarity).
    #[must_use]
    pub const fn one() -> Self {
        Self(1.0)
    }

    /// Returns the underlying f32 value.
    #[must_use]
    pub fn get(&self) -> f32 {
        self.0
    }

    /// Combines two scores using weighted average.
    ///
    /// # Arguments
    /// * `other` - The other score to combine with
    /// * `weight` - Weight for this score (0.0 to 1.0). The other score gets weight (1.0 - weight).
    ///
    /// # Errors
    /// Returns an error if weight is not in [0.0, 1.0] or is NaN.
    pub fn weighted_combine(&self, other: Score, weight: f32) -> Result<Self, VectorError> {
        if weight.is_nan() || !(0.0..=1.0).contains(&weight) {
            return Err(VectorError::InvalidWeight {
                value: weight,
                reason: "Weight must be in range [0.0, 1.0] and not NaN",
            });
        }
        Ok(Self(self.0 * weight + other.0 * (1.0 - weight)))
    }
}

impl Eq for Score {}

impl PartialOrd for Score {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
}

impl Ord for Score {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        self.0
            .partial_cmp(&other.0)
            .expect("Score values should never be NaN")
    }
}

/// Type-safe wrapper for vector dimensions.
///
/// Ensures compile-time or runtime validation of vector dimensions
/// to prevent dimension mismatches during operations.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct VectorDimension(usize);

impl VectorDimension {
    /// Creates a new `VectorDimension` with validation.
    ///
    /// Returns an error if the dimension is zero.
    pub fn new(dim: usize) -> Result<Self, VectorError> {
        if dim == 0 {
            return Err(VectorError::InvalidDimension {
                dimension: 0,
                reason: "Vector dimension cannot be zero",
            });
        }
        Ok(Self(dim))
    }

    /// Creates a standard 384-dimensional vector dimension.
    #[must_use]
    pub const fn dimension_384() -> Self {
        Self(VECTOR_DIMENSION_384)
    }

    /// Returns the underlying dimension value.
    #[must_use]
    pub const fn get(&self) -> usize {
        self.0
    }

    /// Validates that a vector has the expected dimension.
    pub fn validate_vector(&self, vector: &[f32]) -> Result<(), VectorError> {
        if vector.len() != self.0 {
            return Err(VectorError::DimensionMismatch {
                expected: self.0,
                actual: vector.len(),
            });
        }
        Ok(())
    }
}

/// Errors that can occur during vector operations.
///
/// All error messages include actionable suggestions for resolution.
#[derive(Error, Debug)]
pub enum VectorError {
    #[error(
        "Vector dimension mismatch: expected {expected}, got {actual}\nSuggestion: Ensure all vectors use the same embedding model"
    )]
    DimensionMismatch { expected: usize, actual: usize },

    #[error("Invalid vector dimension: {dimension}\nReason: {reason}")]
    InvalidDimension {
        dimension: usize,
        reason: &'static str,
    },

    #[error("Invalid score value: {value}\nReason: {reason}")]
    InvalidScore { value: f32, reason: &'static str },

    #[error(
        "Cache warming failed: {0}\nSuggestion: Check disk space and permissions for cache directory"
    )]
    CacheWarming(String),

    #[error(
        "Invalid cluster ID: {0}\nSuggestion: Ensure clustering has been performed before assigning vectors"
    )]
    InvalidClusterId(u32),

    #[error("Storage error: {0}\nSuggestion: Check disk space and file permissions")]
    Storage(#[from] std::io::Error),

    #[error(
        "Embedding generation failed: {0}\nSuggestion: Verify the embedding model is properly initialized"
    )]
    EmbeddingFailed(String),

    #[error(
        "Clustering failed: {0}\nSuggestion: Ensure sufficient vectors are available for clustering (minimum: k clusters)"
    )]
    ClusteringFailed(String),

    #[error(
        "Serialization error: {0}\nSuggestion: Check that vector data is valid and not corrupted"
    )]
    Serialization(String),

    #[error("Vector not found: ID {0}\nSuggestion: Verify the vector was properly indexed")]
    VectorNotFound(u32),
    #[error("Invalid weight value: {value}\nReason: {reason}")]
    InvalidWeight { value: f32, reason: &'static str },

    #[error(
        "Invalid storage version: expected {expected}, got {actual}\nSuggestion: Migrate the storage format or use a compatible version"
    )]
    VersionMismatch { expected: u32, actual: u32 },
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_vector_id_construction() {
        // Valid construction
        let id = VectorId::new(42).unwrap();
        assert_eq!(id.get(), 42);

        // Invalid construction (zero)
        assert!(VectorId::new(0).is_none());

        // Unchecked construction
        let id = VectorId::new_unchecked(100);
        assert_eq!(id.get(), 100);
    }

    #[test]
    #[should_panic(expected = "VectorId cannot be zero")]
    fn test_vector_id_unchecked_panic() {
        let _ = VectorId::new_unchecked(0);
    }

    #[test]
    fn test_vector_id_serialization() {
        let id = VectorId::new(12345).unwrap();
        let bytes = id.to_bytes();
        let deserialized = VectorId::from_bytes(bytes).unwrap();
        assert_eq!(id, deserialized);
    }

    #[test]
    fn test_cluster_id_construction() {
        // Valid construction
        let id = ClusterId::new(1).unwrap();
        assert_eq!(id.get(), 1);

        // Invalid construction (zero)
        assert!(ClusterId::new(0).is_none());
    }

    #[test]
    fn test_segment_ordinal() {
        let seg = SegmentOrdinal::new(0);
        assert_eq!(seg.get(), 0);

        let seg2 = SegmentOrdinal::new(42);
        assert_eq!(seg2.get(), 42);

        // Test ordering
        assert!(seg < seg2);
    }

    #[test]
    fn test_score_validation() {
        // Valid scores
        let score = Score::new(0.5).unwrap();
        assert_eq!(score.get(), 0.5);

        let zero = Score::zero();
        assert_eq!(zero.get(), 0.0);

        let one = Score::one();
        assert_eq!(one.get(), 1.0);

        // Invalid scores
        assert!(Score::new(-0.1).is_err());
        assert!(Score::new(1.1).is_err());
        assert!(Score::new(f32::NAN).is_err());
    }

    #[test]
    fn test_score_combining() {
        let score1 = Score::new(0.8).unwrap();
        let score2 = Score::new(0.6).unwrap();

        let combined = score1.weighted_combine(score2, 0.7).unwrap();
        assert!((combined.get() - 0.74).abs() < f32::EPSILON);
    }

    #[test]
    fn test_vector_dimension() {
        let dim = VectorDimension::new(384).unwrap();
        assert_eq!(dim.get(), 384);

        let standard = VectorDimension::dimension_384();
        assert_eq!(standard.get(), 384);

        // Invalid dimension
        assert!(VectorDimension::new(0).is_err());

        // Validation
        let vec = vec![0.1; 384];
        assert!(dim.validate_vector(&vec).is_ok());

        let wrong_vec = vec![0.1; 100];
        assert!(dim.validate_vector(&wrong_vec).is_err());
    }
}