1use crate::vsa::{SparseVec, DIM};
26use crate::VsaError;
27use serde::{Deserialize, Serialize};
28use std::collections::HashMap;
29
30#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
34pub struct BalancedTernaryWord {
35 packed: u64,
39}
40
41#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
43pub enum WordMetadata {
44 Data = 0b000,
46 SemanticOutlier = 0b001,
48 Residual = 0b010,
50 Continuation = 0b011,
52 EndOfSequence = 0b100,
54 Parity = 0b101,
56}
57
58impl BalancedTernaryWord {
59 pub const MAX_VALUE: i64 = 675_425_858_836_496_044;
63 pub const MIN_VALUE: i64 = -675_425_858_836_496_044;
64
65 pub const DATA_TRITS: usize = 38;
67
68 pub const META_TRITS: usize = 2;
70
71 pub fn new(value: i64, metadata: WordMetadata) -> Result<Self, VsaError> {
73 if !(Self::MIN_VALUE..=Self::MAX_VALUE).contains(&value) {
74 return Err(VsaError::ValueOutOfRange {
75 value,
76 min: Self::MIN_VALUE,
77 max: Self::MAX_VALUE,
78 });
79 }
80
81 let encoded = Self::encode_balanced_ternary(value);
83
84 let meta_bits = (metadata as u64) << 61;
86
87 Ok(BalancedTernaryWord {
88 packed: encoded | meta_bits,
89 })
90 }
91
92 pub fn from_raw(packed: u64) -> Self {
94 BalancedTernaryWord { packed }
95 }
96
97 pub fn raw(&self) -> u64 {
99 self.packed
100 }
101
102 pub fn data_bits(&self) -> u64 {
104 self.packed & 0x1FFF_FFFF_FFFF_FFFF
105 }
106
107 pub fn metadata(&self) -> WordMetadata {
109 match (self.packed >> 61) & 0b111 {
110 0b000 => WordMetadata::Data,
111 0b001 => WordMetadata::SemanticOutlier,
112 0b010 => WordMetadata::Residual,
113 0b011 => WordMetadata::Continuation,
114 0b100 => WordMetadata::EndOfSequence,
115 0b101 => WordMetadata::Parity,
116 _ => WordMetadata::Data, }
118 }
119
120 pub fn decode(&self) -> i64 {
122 Self::decode_balanced_ternary(self.data_bits())
123 }
124
125 fn encode_balanced_ternary(value: i64) -> u64 {
132 let mut v = value;
135 let mut result: u64 = 0;
136 let mut power: u64 = 1;
137
138 for _ in 0..Self::DATA_TRITS {
139 let mut rem = v % 3;
141 v /= 3;
142
143 if rem == 2 {
144 rem = -1;
145 v += 1;
146 } else if rem == -2 {
147 rem = 1;
148 v -= 1;
149 }
150
151 let encoded = match rem {
153 -1 => 2u64,
154 0 => 0u64,
155 1 => 1u64,
156 _ => 0u64, };
158
159 result += encoded * power;
160 power *= 3;
161 }
162
163 result
164 }
165
166 fn decode_balanced_ternary(packed: u64) -> i64 {
168 let mut result: i64 = 0;
169 let mut power: i64 = 1;
170 let mut remaining = packed;
171
172 for _ in 0..Self::DATA_TRITS {
173 let trit = remaining % 3;
174 remaining /= 3;
175
176 match trit {
177 0 => {} 1 => result += power,
179 2 => result -= power, _ => unreachable!(),
181 }
182 power *= 3;
183 }
184
185 result
186 }
187
188 #[allow(dead_code)]
190 fn negate_trits(packed: u64) -> u64 {
191 let mut result: u64 = 0;
192 let mut remaining = packed;
193 let mut power: u64 = 1;
194
195 for _ in 0..Self::DATA_TRITS {
196 let trit = remaining % 3;
197 remaining /= 3;
198
199 let negated = match trit {
201 0 => 0,
202 1 => 2,
203 2 => 1,
204 _ => unreachable!(),
205 };
206 result += negated * power;
207 power *= 3;
208 }
209
210 result
211 }
212
213 pub fn compute_parity(&self) -> i8 {
215 let mut sum: i64 = 0;
216 let mut remaining = self.data_bits();
217
218 for _ in 0..Self::DATA_TRITS {
219 let trit = (remaining % 3) as i64;
220 remaining /= 3;
221
222 sum += match trit {
224 0 => 0,
225 1 => 1,
226 2 => -1,
227 _ => 0,
228 };
229 }
230
231 ((3 - (sum.rem_euclid(3))) % 3) as i8
233 }
234}
235
236#[derive(Clone, Debug, Serialize, Deserialize)]
238pub struct SemanticOutlier {
239 pub position: usize,
241 pub length: usize,
243 pub entropy_score: f64,
245 pub encoded_pattern: Vec<BalancedTernaryWord>,
247 pub semantic_vec: SparseVec,
249}
250
251#[derive(Clone, Debug, Serialize, Deserialize)]
253pub struct BasisVector {
254 pub id: u32,
256 pub vector: SparseVec,
258 pub label: Option<String>,
260 pub weight: f64,
262}
263
264#[derive(Clone, Debug, Serialize, Deserialize)]
266pub struct Codebook {
267 pub version: u32,
269
270 pub dimensionality: usize,
272
273 pub basis_vectors: Vec<BasisVector>,
276
277 pub semantic_markers: Vec<SparseVec>,
279
280 pub statistics: CodebookStatistics,
282
283 pub salt: Option<[u8; 32]>,
285}
286
287#[derive(Clone, Debug, Default, Serialize, Deserialize)]
289pub struct CodebookStatistics {
290 pub total_bytes_encoded: u64,
292 pub avg_compression_ratio: f64,
294 pub outlier_count: u64,
296 pub coefficient_histogram: [u64; 16],
298}
299
300#[derive(Clone, Debug)]
302pub struct ProjectionConfig {
303 pub chunk_size: usize,
305 pub similarity_threshold: f64,
307 pub max_basis_matches: usize,
309 pub coefficient_scale: f64,
311 pub coefficient_key_spacing: u32,
313}
314
315impl Default for ProjectionConfig {
316 fn default() -> Self {
317 Self {
318 chunk_size: 64,
319 similarity_threshold: 0.3,
320 max_basis_matches: 4,
321 coefficient_scale: 1000.0,
322 coefficient_key_spacing: 1000,
323 }
324 }
325}
326
327#[derive(Clone, Debug)]
329pub struct CodebookTrainingConfig {
330 pub max_basis_vectors: usize,
332 pub min_frequency: u64,
334 pub include_byte_basis: bool,
336 pub include_position_basis: bool,
338}
339
340impl Default for CodebookTrainingConfig {
341 fn default() -> Self {
342 Self {
343 max_basis_vectors: 512,
344 min_frequency: 5,
345 include_byte_basis: true,
346 include_position_basis: true,
347 }
348 }
349}
350
351#[derive(Clone, Debug, Serialize, Deserialize)]
353pub struct ProjectionResult {
354 pub coefficients: HashMap<u32, BalancedTernaryWord>,
356 pub residual: Vec<BalancedTernaryWord>,
358 pub outliers: Vec<SemanticOutlier>,
360 pub quality_score: f64,
362}
363
364impl Default for Codebook {
365 fn default() -> Self {
366 Self::new(DIM)
367 }
368}
369
370impl Codebook {
371 pub fn new(dimensionality: usize) -> Self {
373 Codebook {
374 version: 1,
375 dimensionality,
376 basis_vectors: Vec::new(),
377 semantic_markers: Vec::new(),
378 statistics: CodebookStatistics::default(),
379 salt: None,
380 }
381 }
382
383 pub fn with_salt(dimensionality: usize, salt: [u8; 32]) -> Self {
385 let mut codebook = Self::new(dimensionality);
386 codebook.salt = Some(salt);
387 codebook
388 }
389
390 pub fn initialize_standard_basis(&mut self) {
392 self.add_basis_for_pattern(0, b"\x00\x00\x00\x00", "zero_run");
397
398 self.add_basis_for_pattern(1, b" ", "space_run");
400 self.add_basis_for_pattern(2, b"\n\n", "newline_pair");
401
402 self.add_basis_for_pattern(3, b"the ", "the_space");
404 self.add_basis_for_pattern(4, b"ing ", "ing_space");
405 self.add_basis_for_pattern(5, b"tion", "tion");
406
407 self.add_basis_for_pattern(6, b"\x89PNG", "png_header");
409 self.add_basis_for_pattern(7, b"\xFF\xD8\xFF", "jpeg_header");
410 self.add_basis_for_pattern(8, b"PK\x03\x04", "zip_header");
411
412 self.initialize_semantic_markers();
414 }
415
416 pub fn initialize_byte_basis(&mut self) {
424 self.initialize_byte_basis_with_config(true);
425 }
426
427 pub fn initialize_byte_basis_with_config(&mut self, include_position_basis: bool) {
432 use sha2::{Digest, Sha256};
433
434 for byte_val in 0u8..=255 {
436 let mut hasher = Sha256::new();
437 hasher.update(b"embeddenator:byte_basis:v1:");
438 hasher.update([byte_val]);
439 hasher.update((self.dimensionality as u64).to_le_bytes());
440 if let Some(salt) = &self.salt {
441 hasher.update(salt);
442 }
443 let hash = hasher.finalize();
444 let seed: [u8; 32] = hash.into();
445
446 let vector = SparseVec::from_seed(&seed, self.dimensionality);
447 self.basis_vectors.push(BasisVector {
448 id: byte_val as u32,
449 vector,
450 label: Some(format!("byte_{:02x}", byte_val)),
451 weight: 1.0,
452 });
453 }
454
455 if include_position_basis {
458 for pos in 0..64 {
459 let mut hasher = Sha256::new();
460 hasher.update(b"embeddenator:position_basis:v1:");
461 hasher.update((pos as u64).to_le_bytes());
462 hasher.update((self.dimensionality as u64).to_le_bytes());
463 if let Some(salt) = &self.salt {
464 hasher.update(salt);
465 }
466 let hash = hasher.finalize();
467 let seed: [u8; 32] = hash.into();
468
469 let vector = SparseVec::from_seed(&seed, self.dimensionality);
470 self.basis_vectors.push(BasisVector {
471 id: 256 + pos as u32,
472 vector,
473 label: Some(format!("pos_{}", pos)),
474 weight: 1.0,
475 });
476 }
477 }
478 }
479
480 pub fn train(&mut self, training_data: &[&[u8]], config: &CodebookTrainingConfig) -> usize {
503 use std::collections::HashMap;
504
505 let mut added = 0;
508 if config.include_byte_basis {
509 self.initialize_byte_basis_with_config(config.include_position_basis);
510 added += 256; if config.include_position_basis {
512 added += 64; }
514 }
515
516 let mut ngram_counts: HashMap<Vec<u8>, u64> = HashMap::new();
518
519 for data in training_data {
520 for window_size in &[2usize, 3, 4, 6, 8] {
521 if data.len() >= *window_size {
522 for window in data.windows(*window_size) {
523 *ngram_counts.entry(window.to_vec()).or_insert(0) += 1;
524 }
525 }
526 }
527 }
528
529 let mut patterns: Vec<(Vec<u8>, u64)> = ngram_counts.into_iter().collect();
531 patterns.sort_by(|a, b| b.1.cmp(&a.1));
532
533 const PATTERN_ID_START: u32 = 1000;
536 let mut pattern_id = PATTERN_ID_START;
537 for (pattern, count) in patterns.iter().take(config.max_basis_vectors) {
538 if *count >= config.min_frequency && pattern.len() >= 2 {
539 let label = format!("pattern_{:02x}_{}_freq{}", pattern[0], pattern.len(), count);
540 self.add_basis_for_pattern(pattern_id, pattern, &label);
541 pattern_id += 1;
542 added += 1;
543 }
544 }
545
546 self.statistics.total_bytes_encoded = training_data.iter().map(|d| d.len() as u64).sum();
547
548 added
549 }
550
551 pub fn train_from_files(
555 &mut self,
556 paths: &[&std::path::Path],
557 config: &CodebookTrainingConfig,
558 ) -> std::io::Result<usize> {
559 let mut training_data: Vec<Vec<u8>> = Vec::new();
560
561 for path in paths {
562 if let Ok(data) = std::fs::read(path) {
563 training_data.push(data);
564 }
565 }
566
567 let refs: Vec<&[u8]> = training_data.iter().map(|v| v.as_slice()).collect();
568 Ok(self.train(&refs, config))
569 }
570
571 fn add_basis_for_pattern(&mut self, id: u32, pattern: &[u8], label: &str) {
573 use sha2::{Digest, Sha256};
574
575 let mut hasher = Sha256::new();
577 hasher.update(pattern);
578 if let Some(salt) = &self.salt {
579 hasher.update(salt);
580 }
581 let hash = hasher.finalize();
582
583 let seed: [u8; 32] = hash.into();
585 let vector = SparseVec::from_seed(&seed, self.dimensionality);
586
587 self.basis_vectors.push(BasisVector {
588 id,
589 vector,
590 label: Some(label.to_string()),
591 weight: 1.0,
592 });
593 }
594
595 fn initialize_semantic_markers(&mut self) {
597 use sha2::{Digest, Sha256};
598
599 let seed_for = |label: &str| -> [u8; 32] {
600 let mut hasher = Sha256::new();
601 hasher.update(b"embeddenator:semantic_marker:v1:");
602 hasher.update(label.as_bytes());
603 hasher.update((self.dimensionality as u64).to_le_bytes());
604 if let Some(salt) = &self.salt {
605 hasher.update(salt);
606 }
607 hasher.finalize().into()
608 };
609
610 let seed = seed_for("high_entropy");
612 self.semantic_markers
613 .push(SparseVec::from_seed(&seed, self.dimensionality));
614
615 let seed = seed_for("repetition");
617 self.semantic_markers
618 .push(SparseVec::from_seed(&seed, self.dimensionality));
619
620 let seed = seed_for("boundary");
622 self.semantic_markers
623 .push(SparseVec::from_seed(&seed, self.dimensionality));
624 }
625
626 pub fn project(&self, data: &[u8]) -> ProjectionResult {
629 self.project_with_config(data, &ProjectionConfig::default())
630 }
631
632 pub fn project_with_config(&self, data: &[u8], config: &ProjectionConfig) -> ProjectionResult {
634 let mut coefficients = HashMap::new();
635 let mut residual = Vec::new();
636 let mut outliers = Vec::new();
637
638 let detected_outliers = self.detect_semantic_outliers(data);
640 outliers.extend(detected_outliers);
641
642 let chunk_size = config.chunk_size;
644 for (chunk_idx, chunk) in data.chunks(chunk_size).enumerate() {
645 let chunk_vec = SparseVec::from_bytes(chunk);
646
647 let mut best_matches: Vec<(u32, f64)> = self
649 .basis_vectors
650 .iter()
651 .map(|basis| (basis.id, chunk_vec.cosine(&basis.vector)))
652 .filter(|(_, sim)| *sim > config.similarity_threshold)
653 .collect();
654
655 best_matches.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Less));
657
658 for (basis_id, similarity) in best_matches.iter().take(config.max_basis_matches) {
660 let coef_value = (*similarity * config.coefficient_scale) as i64;
662 if let Ok(word) = BalancedTernaryWord::new(coef_value, WordMetadata::Data) {
663 coefficients.insert(
664 *basis_id * config.coefficient_key_spacing + chunk_idx as u32,
665 word,
666 );
667 }
668 }
669
670 let reconstructed = self.reconstruct_chunk(&coefficients, chunk_idx, chunk.len());
672 let chunk_residual = self.compute_residual(chunk, &reconstructed);
673
674 for residual_byte in chunk_residual {
675 if let Ok(word) =
676 BalancedTernaryWord::new(residual_byte as i64, WordMetadata::Residual)
677 {
678 residual.push(word);
679 }
680 }
681 }
682
683 let quality_score = self.calculate_quality_score(data, &coefficients, &residual);
685
686 ProjectionResult {
687 coefficients,
688 residual,
689 outliers,
690 quality_score,
691 }
692 }
693
694 fn detect_semantic_outliers(&self, data: &[u8]) -> Vec<SemanticOutlier> {
696 let mut outliers = Vec::new();
697 let window_size = 32;
698
699 if data.len() < window_size {
700 return outliers;
701 }
702
703 for i in 0..data.len() - window_size {
704 let window = &data[i..i + window_size];
705 let entropy = self.calculate_entropy(window);
706
707 if entropy > 7.5 {
709 let pattern_vec = SparseVec::from_bytes(window);
710
711 let mut encoded_pattern = Vec::new();
713 for chunk in window.chunks(8) {
714 let value = chunk
715 .iter()
716 .enumerate()
717 .fold(0i64, |acc, (j, &b)| acc + ((b as i64) << (j * 8)));
718 if let Ok(word) = BalancedTernaryWord::new(value, WordMetadata::SemanticOutlier)
719 {
720 encoded_pattern.push(word);
721 }
722 }
723
724 outliers.push(SemanticOutlier {
725 position: i,
726 length: window_size,
727 entropy_score: entropy,
728 encoded_pattern,
729 semantic_vec: pattern_vec,
730 });
731
732 }
735 }
736
737 outliers.dedup_by(|a, b| a.position.abs_diff(b.position) < window_size / 2);
739
740 outliers
741 }
742
743 fn calculate_entropy(&self, data: &[u8]) -> f64 {
745 let mut counts = [0u32; 256];
746 for &byte in data {
747 counts[byte as usize] += 1;
748 }
749
750 let len = data.len() as f64;
751 counts
752 .iter()
753 .filter(|&&c| c > 0)
754 .map(|&c| {
755 let p = c as f64 / len;
756 -p * p.log2()
757 })
758 .sum()
759 }
760
761 fn reconstruct_chunk(
770 &self,
771 coefficients: &HashMap<u32, BalancedTernaryWord>,
772 chunk_idx: usize,
773 chunk_len: usize,
774 ) -> Vec<u8> {
775 if chunk_len == 0 || coefficients.is_empty() || self.basis_vectors.is_empty() {
777 return vec![0u8; chunk_len];
778 }
779
780 let config = ProjectionConfig::default();
781 let key_spacing = config.coefficient_key_spacing;
782 let coef_scale = config.coefficient_scale;
783
784 let mut reconstruction: Vec<i32> = vec![0i32; chunk_len];
786
787 for basis in &self.basis_vectors {
789 let key = basis.id * key_spacing + chunk_idx as u32;
790 if let Some(coef_word) = coefficients.get(&key) {
791 let coef_value = coef_word.decode();
792 let weight = coef_value as f64 / coef_scale;
794
795 let chunk_weight = (weight * 128.0) as i32;
802
803 for &idx in &basis.vector.pos {
809 let pos = idx % chunk_len;
810 reconstruction[pos] = reconstruction[pos].saturating_add(chunk_weight);
811 }
812 for &idx in &basis.vector.neg {
813 let pos = idx % chunk_len;
814 reconstruction[pos] = reconstruction[pos].saturating_sub(chunk_weight);
815 }
816 }
817 }
818
819 reconstruction
821 .iter()
822 .map(|&val| val.clamp(0, 255) as u8)
823 .collect()
824 }
825
826 fn compute_residual(&self, original: &[u8], reconstructed: &[u8]) -> Vec<u8> {
828 original
829 .iter()
830 .zip(reconstructed.iter())
831 .map(|(&o, &r)| o.wrapping_sub(r))
832 .collect()
833 }
834
835 fn calculate_quality_score(
844 &self,
845 original: &[u8],
846 coefficients: &HashMap<u32, BalancedTernaryWord>,
847 _residual: &[BalancedTernaryWord],
848 ) -> f64 {
849 if original.is_empty() {
850 return 1.0; }
852
853 if coefficients.is_empty() {
854 return 0.1;
857 }
858
859 let config = ProjectionConfig::default();
860
861 let total_coef_magnitude: f64 = coefficients
863 .values()
864 .map(|word| {
865 let val = word.decode() as f64;
866 (val / config.coefficient_scale).abs()
868 })
869 .sum();
870
871 let avg_similarity = total_coef_magnitude / coefficients.len() as f64;
872
873 let chunk_count = original.len().div_ceil(config.chunk_size);
875 let key_spacing = config.coefficient_key_spacing;
876
877 let chunks_with_coefs: std::collections::HashSet<u32> =
878 coefficients.keys().map(|&key| key % key_spacing).collect();
879
880 let coverage_ratio = chunks_with_coefs.len() as f64 / chunk_count.max(1) as f64;
881
882 let quality = (avg_similarity * 0.5 + coverage_ratio * 0.5).min(1.0);
885
886 quality.max(0.1)
888 }
889
890 pub fn reconstruct(&self, projection: &ProjectionResult, expected_size: usize) -> Vec<u8> {
892 let mut result = Vec::with_capacity(expected_size);
893
894 let chunk_size = 64;
896 let num_chunks = expected_size.div_ceil(chunk_size);
897
898 for chunk_idx in 0..num_chunks {
899 let chunk = self.reconstruct_chunk(&projection.coefficients, chunk_idx, chunk_size);
900 result.extend(chunk);
901 }
902
903 for (i, residual_word) in projection.residual.iter().enumerate() {
905 if i < result.len() {
906 let correction = residual_word.decode() as u8;
907 result[i] = result[i].wrapping_add(correction);
908 }
909 }
910
911 for outlier in &projection.outliers {
913 if outlier.position + outlier.length <= result.len() {
914 let mut decoded = Vec::new();
916 for word in &outlier.encoded_pattern {
917 let value = word.decode();
918 for j in 0..8 {
919 decoded.push(((value >> (j * 8)) & 0xFF) as u8);
920 }
921 }
922
923 for (j, &byte) in decoded.iter().enumerate().take(outlier.length) {
924 if outlier.position + j < result.len() {
925 result[outlier.position + j] = byte;
926 }
927 }
928 }
929 }
930
931 result.truncate(expected_size);
932 result
933 }
934}
935
936impl SparseVec {
937 pub fn from_seed(seed: &[u8; 32], dim: usize) -> Self {
939 use rand::seq::SliceRandom;
940 use rand::SeedableRng;
941
942 let mut rng = rand::rngs::StdRng::from_seed(*seed);
943 let sparsity = dim / 100; let mut indices: Vec<usize> = (0..dim).collect();
946 indices.shuffle(&mut rng);
947
948 let mut pos: Vec<_> = indices[..sparsity].to_vec();
949 let mut neg: Vec<_> = indices[sparsity..sparsity * 2].to_vec();
950
951 pos.sort_unstable();
952 neg.sort_unstable();
953
954 SparseVec { pos, neg }
955 }
956
957 pub fn from_bytes(data: &[u8]) -> Self {
959 use sha2::{Digest, Sha256};
960
961 let mut hasher = Sha256::new();
962 hasher.update(data);
963 let hash = hasher.finalize();
964 let seed: [u8; 32] = hash.into();
965
966 Self::from_seed(&seed, DIM)
967 }
968}
969
970#[cfg(test)]
971mod tests {
972 use super::*;
973
974 #[test]
975 fn test_balanced_ternary_roundtrip() {
976 let test_values = [
977 0i64,
978 1,
979 -1,
980 100,
981 -100,
982 12345,
983 -12345,
984 BalancedTernaryWord::MAX_VALUE / 2,
985 BalancedTernaryWord::MIN_VALUE / 2,
986 ];
987
988 for &value in &test_values {
989 let word = BalancedTernaryWord::new(value, WordMetadata::Data)
990 .expect("Test value should be encodable");
991 let decoded = word.decode();
992 assert_eq!(value, decoded, "Failed roundtrip for {}", value);
993 }
994 }
995
996 #[test]
997 fn test_balanced_ternary_metadata() {
998 let word = BalancedTernaryWord::new(42, WordMetadata::SemanticOutlier)
999 .expect("42 should be encodable");
1000 assert_eq!(word.metadata(), WordMetadata::SemanticOutlier);
1001 assert_eq!(word.decode(), 42);
1002 }
1003
1004 #[test]
1005 fn test_balanced_ternary_range() {
1006 assert!(
1008 BalancedTernaryWord::new(BalancedTernaryWord::MAX_VALUE, WordMetadata::Data).is_ok()
1009 );
1010 assert!(
1011 BalancedTernaryWord::new(BalancedTernaryWord::MIN_VALUE, WordMetadata::Data).is_ok()
1012 );
1013
1014 assert!(
1016 BalancedTernaryWord::new(BalancedTernaryWord::MAX_VALUE + 1, WordMetadata::Data)
1017 .is_err()
1018 );
1019 assert!(
1020 BalancedTernaryWord::new(BalancedTernaryWord::MIN_VALUE - 1, WordMetadata::Data)
1021 .is_err()
1022 );
1023 }
1024
1025 #[test]
1026 fn test_codebook_projection() {
1027 let mut codebook = Codebook::new(10000);
1028 codebook.initialize_standard_basis();
1029
1030 let data = b"the quick brown fox jumps over the lazy dog";
1031 let projection = codebook.project(data);
1032
1033 assert!(projection.quality_score > 0.0);
1034 assert!(!projection.coefficients.is_empty() || !projection.residual.is_empty());
1035 }
1036
1037 #[test]
1038 fn test_parity_computation() {
1039 let word =
1040 BalancedTernaryWord::new(12345, WordMetadata::Data).expect("12345 should be encodable");
1041 let parity = word.compute_parity();
1042 assert!((-1..=1).contains(&parity));
1043 }
1044}