chaotic_semantic_memory 0.3.2

AI memory systems with hyperdimensional vectors and chaotic reservoirs
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
use crate::error::{MemoryError, Result};
use crate::export_payload::{BinaryExportPayload, ExportPayload, unix_now_secs};
use crate::framework::ChaoticSemanticFramework;
use crate::framework_events::MemoryEvent;
use crate::hyperdim::HVec10240;
use crate::singularity::ConceptBuilder;
use bincode::Options;
use std::path::PathBuf;
use std::sync::Arc;
use tokio::fs;
use tracing::{instrument, warn};

const MAX_IMPORT_SIZE: u64 = 100 * 1024 * 1024; // 100 MB default
const MAX_PATH_LENGTH: usize = 4096;

fn validate_path(path: &str) -> Result<PathBuf> {
    if path.len() > MAX_PATH_LENGTH {
        return Err(MemoryError::InvalidInput {
            field: "path".to_string(),
            reason: format!(
                "path exceeds maximum length of {} characters",
                MAX_PATH_LENGTH
            ),
        });
    }

    let path = PathBuf::from(path);

    if path
        .components()
        .any(|c| c == std::path::Component::ParentDir)
    {
        return Err(MemoryError::InvalidInput {
            field: "path".to_string(),
            reason: "path traversal '..' components are not allowed".to_string(),
        });
    }

    if path.is_absolute() {
        let normalized = match path.canonicalize() {
            Ok(p) => p,
            Err(_) => {
                return Err(MemoryError::InvalidInput {
                    field: "path".to_string(),
                    reason: "absolute path does not exist or cannot be accessed".to_string(),
                });
            }
        };

        let current_dir = std::env::current_dir().map_err(|e| MemoryError::InvalidInput {
            field: "path".to_string(),
            reason: format!("cannot determine current working directory: {}", e),
        })?;

        if !normalized.starts_with(&current_dir) && !normalized.starts_with("/tmp") {
            return Err(MemoryError::InvalidInput {
                field: "path".to_string(),
                reason: "absolute paths must be within current working directory or /tmp"
                    .to_string(),
            });
        }
    }

    Ok(path)
}

impl ChaoticSemanticFramework {
    /// Batch inject multiple concepts into memory.
    ///
    /// Each concept is validated and inserted atomically. If persistence is enabled,
    /// concepts are persisted to the database in a single batch operation.
    #[instrument(err, skip(self, concepts))]
    pub async fn inject_concepts(&self, concepts: &[(String, HVec10240)]) -> Result<()> {
        if concepts.is_empty() {
            return Ok(());
        }

        let mut to_save = Vec::with_capacity(concepts.len());
        {
            let mut sing = self.singularity.write().await;
            for (id, vector) in concepts {
                Self::validate_concept_id(id)?;
                let concept = ConceptBuilder::new(id.clone())
                    .with_vector(*vector)
                    .build()?;
                sing.inject(concept.clone())?;
                to_save.push(concept);
            }
        }

        if let Some(ref persistence) = self.persistence {
            persistence.save_concepts(&to_save).await?;
        }

        self.metrics.inc_concepts_injected(to_save.len() as u64);
        Ok(())
    }

    /// Batch create associations between concepts.
    ///
    /// Each association is validated before insertion. If persistence is enabled,
    /// associations are persisted in a single batch operation.
    #[instrument(err, skip(self, associations))]
    pub async fn associate_many(&self, associations: &[(String, String, f32)]) -> Result<()> {
        if associations.is_empty() {
            return Ok(());
        }

        {
            let mut sing = self.singularity.write().await;
            for (from, to, strength) in associations {
                Self::validate_concept_id(from)?;
                Self::validate_concept_id(to)?;
                Self::validate_association_strength(*strength)?;
                sing.associate(from, to, *strength)?;
            }
        }

        if let Some(ref persistence) = self.persistence {
            persistence.save_associations(associations).await?;
        }

        self.metrics
            .inc_associations_created(associations.len() as u64);
        Ok(())
    }

    /// Batch similarity queries without caching.
    ///
    /// Returns similarity results for each query vector. Results are not cached.
    #[instrument(err, skip(self, queries))]
    pub async fn probe_batch(
        &self,
        queries: &[HVec10240],
        top_k: usize,
    ) -> Result<Vec<Vec<(String, f32)>>> {
        self.validate_top_k(top_k)?;
        let sing = self.singularity.read().await;
        let mut out = Vec::with_capacity(queries.len());
        for query in queries {
            out.push(sing.find_similar(query, top_k));
        }
        Ok(out)
    }

    /// Batch similarity queries with LRU caching.
    ///
    /// Results are cached and reused for identical queries. Returns Arc references
    /// to avoid cloning large result sets.
    #[allow(clippy::type_complexity)]
    #[instrument(err, skip(self, queries))]
    pub async fn probe_batch_cached(
        &self,
        queries: &[HVec10240],
        top_k: usize,
    ) -> Result<Vec<Arc<[(String, f32)]>>> {
        self.validate_top_k(top_k)?;
        let sing = self.singularity.read().await;
        let mut out = Vec::with_capacity(queries.len());
        for query in queries {
            out.push(sing.find_similar_cached(query, top_k));
        }
        Ok(out)
    }

    /// Export memory state to JSON file.
    ///
    /// Writes all concepts and associations to the specified path in JSON format.
    /// Useful for backups, debugging, and interoperability.
    #[instrument(err, skip(self), fields(path))]
    pub async fn export_json(&self, path: &str) -> Result<()> {
        let validated_path = validate_path(path)?;

        let payload = {
            let sing = self.singularity.read().await;
            ExportPayload {
                version: env!("CARGO_PKG_VERSION").to_string(),
                exported_at: unix_now_secs(),
                concepts: sing.all_concepts(),
                associations: sing.all_associations(),
            }
        };
        let data = serde_json::to_vec_pretty(&payload)?;
        fs::write(validated_path, data).await?;
        Ok(())
    }
    /// Import memory state from JSON file.
    ///
    /// If `merge` is false, clears existing state before importing.
    /// Returns the number of concepts imported.
    #[instrument(err, skip(self), fields(path, merge))]
    pub async fn import_json(&self, path: &str, merge: bool) -> Result<usize> {
        let validated_path = validate_path(path)?;
        let bytes = fs::read(validated_path).await?;
        if bytes.len() > MAX_IMPORT_SIZE as usize {
            return Err(crate::error::MemoryError::InvalidInput {
                field: "import_data".to_string(),
                reason: format!(
                    "JSON import data size {} exceeds maximum allowed size {}",
                    bytes.len(),
                    MAX_IMPORT_SIZE
                ),
            });
        }
        let payload: ExportPayload = serde_json::from_slice(&bytes)?;

        if !merge {
            {
                let mut sing = self.singularity.write().await;
                sing.clear();
            }
            if let Some(ref persistence) = self.persistence {
                persistence.clear_all().await?;
            }
        }

        // Acquire write lock, inject concepts + build associations list, then release
        let valid_associations = {
            let mut sing = self.singularity.write().await;
            let mut associations = Vec::with_capacity(payload.associations.len());
            for concept in &payload.concepts {
                self.validate_concept(concept)?;
                sing.inject(concept.clone())?;
            }
            for (from, to, strength) in &payload.associations {
                match sing.associate(from, to, *strength) {
                    Ok(()) => associations.push((from.clone(), to.clone(), *strength)),
                    Err(error) => {
                        warn!(
                            from_id = %from,
                            to_id = %to,
                            strength = *strength,
                            error = %error,
                            "skipping invalid association during import_json"
                        );
                    }
                }
            }
            associations
        }; // Lock released here
        // Persist concepts and associations (no lock needed)
        if let Some(ref persistence) = self.persistence {
            persistence.save_concepts(&payload.concepts).await?;
            persistence.save_associations(&valid_associations).await?;
        }
        Ok(payload.concepts.len())
    }
    /// Export memory state to binary file.
    ///
    /// Uses bincode for compact serialization. More efficient than JSON for
    /// large datasets.
    #[instrument(err, skip(self), fields(path))]
    pub async fn export_binary(&self, path: &str) -> Result<()> {
        let validated_path = validate_path(path)?;

        let payload = {
            let sing = self.singularity.read().await;
            let json_payload = ExportPayload {
                version: env!("CARGO_PKG_VERSION").to_string(),
                exported_at: unix_now_secs(),
                concepts: sing.all_concepts(),
                associations: sing.all_associations(),
            };
            // Convert to binary-compatible format
            BinaryExportPayload::from(json_payload)
        };

        let options = bincode::DefaultOptions::new().with_limit(MAX_IMPORT_SIZE);
        let data = options.serialize(&payload).map_err(|e| {
            crate::error::MemoryError::Serialization(serde_json::Error::io(std::io::Error::other(
                e.to_string(),
            )))
        })?;
        fs::write(validated_path, data).await?;
        Ok(())
    }

    /// Import memory state from binary file.
    ///
    /// If `merge` is false, clears existing state before importing.
    /// Returns the number of concepts imported.
    #[instrument(err, skip(self), fields(path, merge))]
    pub async fn import_binary(&self, path: &str, merge: bool) -> Result<usize> {
        let validated_path = validate_path(path)?;
        let bytes = fs::read(validated_path).await?;

        if bytes.len() > MAX_IMPORT_SIZE as usize {
            return Err(crate::error::MemoryError::InvalidInput {
                field: "import_data".to_string(),
                reason: format!(
                    "import data size {} exceeds maximum allowed size {}",
                    bytes.len(),
                    MAX_IMPORT_SIZE
                ),
            });
        }
        let options = bincode::DefaultOptions::new().with_limit(MAX_IMPORT_SIZE);
        let binary_payload: BinaryExportPayload =
            options
                .deserialize(&bytes)
                .map_err(|e| crate::error::MemoryError::InvalidInput {
                    field: "import_data".to_string(),
                    reason: format!("bincode deserialization failed: {}", e),
                })?;
        // Convert to regular payload
        let payload = binary_payload.to_export_payload().map_err(|e| {
            crate::error::MemoryError::InvalidInput {
                field: "import_data".to_string(),
                reason: format!("failed to convert binary payload: {}", e),
            }
        })?;
        if !merge {
            {
                let mut sing = self.singularity.write().await;
                sing.clear();
            }
            if let Some(ref persistence) = self.persistence {
                persistence.clear_all().await?;
            }
        }
        // Acquire write lock, inject concepts + build associations list, then release
        let valid_associations = {
            let mut sing = self.singularity.write().await;
            let mut associations = Vec::with_capacity(payload.associations.len());
            for concept in &payload.concepts {
                self.validate_concept(concept)?;
                sing.inject(concept.clone())?;
            }
            for (from, to, strength) in &payload.associations {
                match sing.associate(from, to, *strength) {
                    Ok(()) => associations.push((from.clone(), to.clone(), *strength)),
                    Err(error) => {
                        warn!(
                            from_id = %from,
                            to_id = %to,
                            strength = *strength,
                            error = %error,
                            "skipping invalid association during import_binary"
                        );
                    }
                }
            }
            associations
        }; // Lock released here

        // Persist concepts and associations (no lock needed)
        if let Some(ref persistence) = self.persistence {
            persistence.save_concepts(&payload.concepts).await?;
            persistence.save_associations(&valid_associations).await?;
        }

        Ok(payload.concepts.len())
    }

    /// Create database backup (SQLite only).
    ///
    /// Creates a copy of the database file. Only works with local SQLite databases.
    #[instrument(err, skip(self), fields(path))]
    pub async fn backup(&self, path: &str) -> Result<()> {
        let validated_path = validate_path(path)?;
        if let Some(ref persistence) = self.persistence {
            persistence
                .backup(validated_path.to_str().unwrap_or(path))
                .await?;
        }
        Ok(())
    }

    /// Restore from database backup (SQLite only).
    ///
    /// Replaces the current database with the backup and reloads memory state.
    #[instrument(err, skip(self), fields(path))]
    pub async fn restore(&self, path: &str) -> Result<()> {
        let validated_path = validate_path(path)?;
        if let Some(ref persistence) = self.persistence {
            persistence
                .restore(validated_path.to_str().unwrap_or(path))
                .await?;
            self.load_replace().await?;
        }
        Ok(())
    }

    /// Get version history for a concept.
    ///
    /// Returns up to `limit` previous versions of the concept, ordered by
    /// version number descending. Returns empty vec if persistence is disabled.
    #[instrument(err, skip(self), fields(id, limit))]
    pub async fn concept_history(
        &self,
        id: &str,
        limit: usize,
    ) -> Result<Vec<crate::persistence::ConceptVersion>> {
        if let Some(ref persistence) = self.persistence {
            return persistence.get_concept_history(id, limit).await;
        }
        Ok(Vec::new())
    }

    /// Update a concept's vector.
    ///
    /// Updates the vector in memory and persists the change if persistence is enabled.
    /// Records a new version in the version history.
    #[instrument(err, skip(self), fields(id))]
    pub async fn update_concept_vector(&self, id: &str, vector: HVec10240) -> Result<()> {
        let concept = {
            let mut sing = self.singularity.write().await;
            sing.update(id, vector)?;
            sing.get(id).cloned()
        };

        if let (Some(concept), Some(persistence)) = (concept, &self.persistence) {
            persistence.save_concept(&concept).await?;
        }
        self.emit_event(MemoryEvent::ConceptUpdated {
            id: id.to_string(),
            timestamp: unix_now_secs(),
        });
        Ok(())
    }

    /// Update a concept's metadata.
    ///
    /// Updates the metadata in memory and persists the change if persistence is enabled.
    #[instrument(err, skip(self), fields(id))]
    pub async fn update_concept_metadata(
        &self,
        id: &str,
        metadata: std::collections::HashMap<String, serde_json::Value>,
    ) -> Result<()> {
        let concept = {
            let mut sing = self.singularity.write().await;
            sing.update_metadata(id, metadata)?;
            sing.get(id).cloned()
        };

        if let (Some(concept), Some(persistence)) = (concept, &self.persistence) {
            persistence.save_concept(&concept).await?;
        }
        self.emit_event(MemoryEvent::ConceptUpdated {
            id: id.to_string(),
            timestamp: unix_now_secs(),
        });
        Ok(())
    }

    /// Remove an association between two concepts.
    ///
    /// Removes the association from memory and persists the change if persistence is enabled.
    #[instrument(err, skip(self), fields(from, to))]
    pub async fn disassociate(&self, from: &str, to: &str) -> Result<()> {
        {
            let mut sing = self.singularity.write().await;
            sing.disassociate(from, to)?;
        }

        if let Some(persistence) = &self.persistence {
            persistence.delete_association(from, to).await?;
        }
        self.emit_event(MemoryEvent::Disassociated {
            from: from.to_string(),
            to: to.to_string(),
        });
        Ok(())
    }

    /// Clear all outbound associations for a concept.
    ///
    /// Removes all associations from the given concept in memory and persists
    /// the change if persistence is enabled.
    #[instrument(err, skip(self), fields(id))]
    pub async fn clear_associations(&self, id: &str) -> Result<()> {
        {
            let mut sing = self.singularity.write().await;
            sing.clear_associations(id)?;
        }

        if let Some(persistence) = &self.persistence {
            persistence.clear_concept_associations(id).await?;
        }
        Ok(())
    }

    /// Clear the similarity query cache.
    ///
    /// Useful when you want to ensure fresh similarity results.
    pub async fn clear_similarity_cache(&self) {
        let sing = self.singularity.read().await;
        sing.clear_similarity_cache();
    }

    /// Bundle multiple concepts into a single hypervector (strict version).
    ///
    /// Returns `NotFound` error if any concept ID is missing.
    pub async fn bundle_concepts_strict(&self, ids: &[String]) -> Result<HVec10240> {
        let sing = self.singularity.read().await;
        sing.bundle_concepts_strict(ids)
    }
}