use std::fs;
use serde::Serialize;
#[derive(Serialize)]
struct LegacyIndex {
schema_ver: u16,
files: std::collections::BTreeMap<String, LegacyEntry>,
}
#[derive(Serialize)]
struct LegacyEntry {
hash_hex: String,
language: String,
size_bytes: u64,
mtime: i64,
}
#[cfg(feature = "documents")]
#[test]
fn pre_iter6_doc_blob_deserialises_into_new_filemap_doc() {
use basemind::extract::doc::FileMapDoc;
use serde::Serialize;
#[derive(Serialize)]
struct OldShape {
schema_ver: u16,
mime_type: String,
content: String,
metadata: Vec<(String, String)>,
detected_languages: Vec<String>,
chunks: Vec<OldChunk>,
embedding_model: String,
embedding_dim: u16,
}
#[derive(Serialize)]
struct OldChunk {
byte_start: u32,
byte_end: u32,
text: String,
embedding: Vec<f32>,
}
let old = OldShape {
schema_ver: 0,
mime_type: "text/plain".to_string(),
content: "hello world".to_string(),
metadata: vec![("title".to_string(), "Test".to_string())],
detected_languages: vec!["eng".to_string()],
chunks: vec![OldChunk {
byte_start: 0,
byte_end: 11,
text: "hello world".to_string(),
embedding: vec![],
}],
embedding_model: String::new(),
embedding_dim: 0,
};
let bytes = rmp_serde::to_vec_named(&old).expect("serialize old shape");
let new_doc: FileMapDoc =
rmp_serde::from_slice(&bytes).expect("old shape must deserialise via serde(default)");
assert_eq!(new_doc.mime_type, "text/plain");
assert_eq!(new_doc.chunks.len(), 1);
assert!(
new_doc.keywords.is_empty(),
"iter-6 `keywords` must default to empty on pre-iter-6 blobs"
);
assert!(
new_doc.entities.is_empty(),
"iter-6 `entities` must default to empty on pre-iter-6 blobs"
);
assert!(
new_doc.summary.is_none(),
"iter-7 `summary` must default to None on pre-iter-6 blobs"
);
}
#[cfg(feature = "documents")]
#[test]
fn pre_iter7_doc_blob_deserialises_into_new_filemap_doc() {
use basemind::extract::doc::FileMapDoc;
use serde::Serialize;
#[derive(Serialize)]
struct PreIter7 {
schema_ver: u16,
mime_type: String,
content: String,
metadata: Vec<(String, String)>,
detected_languages: Vec<String>,
chunks: Vec<PreIter7Chunk>,
embedding_model: String,
embedding_dim: u16,
keywords: Vec<PreIter7Keyword>,
entities: Vec<PreIter7Entity>,
}
#[derive(Serialize)]
struct PreIter7Chunk {
byte_start: u32,
byte_end: u32,
text: String,
embedding: Vec<f32>,
}
#[derive(Serialize)]
struct PreIter7Keyword {
text: String,
score: f32,
algorithm: String,
}
#[derive(Serialize)]
struct PreIter7Entity {
category: String,
text: String,
start: u32,
end: u32,
}
let old = PreIter7 {
schema_ver: 0,
mime_type: "text/plain".to_string(),
content: "hello world".to_string(),
metadata: vec![],
detected_languages: vec!["eng".to_string()],
chunks: vec![PreIter7Chunk {
byte_start: 0,
byte_end: 11,
text: "hello world".to_string(),
embedding: vec![],
}],
embedding_model: String::new(),
embedding_dim: 0,
keywords: vec![PreIter7Keyword {
text: "hello".to_string(),
score: 0.5,
algorithm: "yake".to_string(),
}],
entities: vec![PreIter7Entity {
category: "location".to_string(),
text: "world".to_string(),
start: 6,
end: 11,
}],
};
let bytes = rmp_serde::to_vec_named(&old).expect("serialize pre-iter-7 shape");
let new_doc: FileMapDoc = rmp_serde::from_slice(&bytes)
.expect("pre-iter-7 shape must deserialise via serde(default)");
assert_eq!(new_doc.keywords.len(), 1, "iter-6 keywords preserved");
assert_eq!(new_doc.entities.len(), 1, "iter-6 entities preserved");
assert!(
new_doc.summary.is_none(),
"iter-7 `summary` must default to None on pre-iter-7 blobs"
);
}
#[test]
fn opening_against_stale_schema_index_wipes_cache() {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
let basemind_dir = root.join(".basemind");
let blobs_dir = basemind_dir.join("blobs");
fs::create_dir_all(&blobs_dir).unwrap();
let blob_path = blobs_dir.join("deadbeef.l1.msgpack");
fs::write(&blob_path, b"not really a blob").unwrap();
let mut files = std::collections::BTreeMap::new();
files.insert(
"a.rs".to_string(),
LegacyEntry {
hash_hex: "deadbeef".repeat(8),
language: "rust".to_string(),
size_bytes: 42,
mtime: 0,
},
);
let legacy = LegacyIndex {
schema_ver: 99,
files,
};
let bytes = rmp_serde::to_vec_named(&legacy).unwrap();
fs::write(basemind_dir.join("index.msgpack"), bytes).unwrap();
let store = basemind::store::Store::open(root, basemind::store::VIEW_WORKING)
.expect("open should succeed via auto-wipe");
assert!(
store.index.files.is_empty(),
"in-memory index should be empty after wipe"
);
assert!(!blob_path.exists(), "stale blob should have been removed");
assert!(blobs_dir.exists());
}