agentic_memory/index/
doc_lengths.rs1use crate::engine::tokenizer::Tokenizer;
4use crate::graph::MemoryGraph;
5use crate::types::CognitiveEvent;
6
7pub struct DocLengths {
10 lengths: Vec<u32>,
12}
13
14impl DocLengths {
15 pub fn new() -> Self {
17 Self {
18 lengths: Vec::new(),
19 }
20 }
21
22 pub fn build(graph: &MemoryGraph, tokenizer: &Tokenizer) -> Self {
24 let mut lengths = Vec::new();
25
26 for node in graph.nodes() {
27 let id = node.id as usize;
28 if id >= lengths.len() {
29 lengths.resize(id + 1, 0);
30 }
31 lengths[id] = tokenizer.tokenize(&node.content).len() as u32;
32 }
33
34 Self { lengths }
35 }
36
37 pub fn get(&self, node_id: u64) -> u32 {
39 let idx = node_id as usize;
40 if idx < self.lengths.len() {
41 self.lengths[idx]
42 } else {
43 0
44 }
45 }
46
47 pub fn average(&self) -> f32 {
49 let non_zero: Vec<u32> = self.lengths.iter().filter(|&&l| l > 0).copied().collect();
50 if non_zero.is_empty() {
51 0.0
52 } else {
53 non_zero.iter().sum::<u32>() as f32 / non_zero.len() as f32
54 }
55 }
56
57 pub fn len(&self) -> usize {
59 self.lengths.iter().filter(|&&l| l > 0).count()
60 }
61
62 pub fn is_empty(&self) -> bool {
64 self.len() == 0
65 }
66
67 pub fn add_node(&mut self, event: &CognitiveEvent) {
69 let count = Tokenizer::new().tokenize(&event.content).len() as u32;
70 let id = event.id as usize;
71 if id >= self.lengths.len() {
72 self.lengths.resize(id + 1, 0);
73 }
74 self.lengths[id] = count;
75 }
76
77 pub fn remove_node(&mut self, id: u64) {
79 let idx = id as usize;
80 if idx < self.lengths.len() {
81 self.lengths[idx] = 0;
82 }
83 }
84
85 pub fn clear(&mut self) {
87 self.lengths.clear();
88 }
89
90 pub fn rebuild(&mut self, graph: &MemoryGraph) {
92 *self = Self::build(graph, &Tokenizer::new());
93 }
94
95 pub fn to_bytes(&self) -> Vec<u8> {
97 let mut buf: Vec<u8> = Vec::new();
98 buf.extend_from_slice(&(self.lengths.len() as u64).to_le_bytes());
99 for &len in &self.lengths {
100 buf.extend_from_slice(&len.to_le_bytes());
101 }
102 buf
103 }
104
105 pub fn from_bytes(data: &[u8]) -> Option<Self> {
107 if data.len() < 8 {
108 return None;
109 }
110
111 let count = u64::from_le_bytes(data[0..8].try_into().ok()?) as usize;
112 let expected_size = 8 + count * 4;
113 if data.len() < expected_size {
114 return None;
115 }
116
117 let mut lengths = Vec::with_capacity(count);
118 for i in 0..count {
119 let offset = 8 + i * 4;
120 let len = u32::from_le_bytes(data[offset..offset + 4].try_into().ok()?);
121 lengths.push(len);
122 }
123
124 Some(Self { lengths })
125 }
126}
127
128impl Default for DocLengths {
129 fn default() -> Self {
130 Self::new()
131 }
132}