1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
#[cfg(test)]
mod tests {
use crate::{Memvid, PutOptions, SearchRequest, run_serial_test};
use std::sync::Mutex;
use tempfile::NamedTempFile;
#[test]
#[cfg(not(target_os = "windows"))] // Windows file locking prevents tempfile cleanup
fn test_lex_persists_and_search_works() {
run_serial_test(|| {
let temp = NamedTempFile::new().unwrap();
let path = temp.path();
// Phase 1: create, enable lex, ingest docs with periodic seals
{
let mut mem = Memvid::create(path).unwrap();
mem.enable_lex().unwrap();
for i in 0..1000 {
let content = format!(
"Document {i} with searchable content about technology and artificial intelligence systems"
);
let opts = PutOptions::builder()
.uri(format!("mv2://doc/{i}"))
.search_text(content.clone())
.build();
mem.put_bytes_with_options(content.as_bytes(), opts)
.unwrap();
if (i + 1) % 100 == 0 {
mem.commit().unwrap();
}
}
mem.commit().unwrap();
// Index is present in TOC
assert!(
mem.toc.segment_catalog.lex_enabled,
"lex_enabled should be set in catalog"
);
assert!(
!mem.toc.segment_catalog.tantivy_segments.is_empty(),
"tantivy_segments should not be empty"
);
}
// Phase 2: reopen RO and search
{
let mut mem = Memvid::open_read_only(path).unwrap();
assert!(mem.lex_enabled, "lex_enabled should persist after reopen");
assert!(
mem.toc.segment_catalog.lex_enabled,
"catalog.lex_enabled should persist after reopen"
);
assert!(
!mem.toc.segment_catalog.tantivy_segments.is_empty(),
"tantivy_segments should persist after reopen"
);
let resp = mem
.search(SearchRequest {
query: "artificial intelligence".into(),
top_k: 10,
snippet_chars: 200,
uri: None,
scope: None,
cursor: None,
#[cfg(feature = "temporal_track")]
temporal: None,
as_of_frame: None,
as_of_ts: None,
no_sketch: false,
acl_context: None,
acl_enforcement_mode: crate::types::AclEnforcementMode::Audit,
})
.expect("search must succeed");
assert!(
!resp.hits.is_empty(),
"expected some hits for 'artificial intelligence'"
);
let first_hit = &resp.hits[0];
let text_lower = first_hit.text.to_lowercase();
assert!(
text_lower.contains("artificial") || text_lower.contains("intelligence"),
"first hit should contain search terms, got: {}",
first_hit.text
);
}
});
}
/// Regression test for GitHub issue #201:
/// Lexical index not enabled when Memvid is wrapped in a Mutex.
/// The wrapper pattern acquires the lock, performs an operation, releases
/// the lock — mimicking the typical tokio::sync::Mutex usage in async code.
#[test]
#[cfg(not(target_os = "windows"))]
fn test_lex_works_through_mutex_wrapper() {
run_serial_test(|| {
let temp = NamedTempFile::new().unwrap();
let path = temp.path();
// Wrap Memvid in a Mutex, exactly like an async wrapper would
let wrapper = Mutex::new(Memvid::create(path).unwrap());
// Step 1: enable_lex while holding the lock, then release
{
let mut mem = wrapper.lock().unwrap();
mem.enable_lex().unwrap();
}
// Step 2: commit while holding the lock (separate acquisition)
{
let mut mem = wrapper.lock().unwrap();
mem.commit().unwrap();
}
// Step 3: put data while holding the lock
{
let mut mem = wrapper.lock().unwrap();
let opts = PutOptions::builder()
.uri("mv2://test/login".to_string())
.search_text(
"user clicked login button on the authentication page".to_string(),
)
.build();
mem.put_bytes_with_options(b"login event data", opts)
.unwrap();
}
// Step 4: commit while holding the lock
{
let mut mem = wrapper.lock().unwrap();
mem.commit().unwrap();
}
// Step 5: search while holding the lock — this was failing in #201
{
let mut mem = wrapper.lock().unwrap();
let resp = mem
.search(SearchRequest {
query: "login".into(),
top_k: 10,
snippet_chars: 200,
uri: None,
scope: None,
cursor: None,
#[cfg(feature = "temporal_track")]
temporal: None,
as_of_frame: None,
as_of_ts: None,
no_sketch: false,
acl_context: None,
acl_enforcement_mode: crate::types::AclEnforcementMode::Audit,
})
.expect("search must succeed through mutex wrapper");
assert!(
!resp.hits.is_empty(),
"Should find the frame with 'login' in the message"
);
}
// Step 6: search_lex uses the legacy LexIndex, which may not be
// populated when only Tantivy is active. Verify it doesn't panic.
{
let mut mem = wrapper.lock().unwrap();
let _ = mem.search_lex("login", 10);
// Result may be Ok (if legacy index was built) or Err (if only Tantivy).
// The important thing is it doesn't panic.
}
});
}
/// Regression test for #201: enable_lex, put, commit, search — all in one lock scope.
#[test]
#[cfg(not(target_os = "windows"))]
fn test_lex_works_single_scope() {
run_serial_test(|| {
let temp = NamedTempFile::new().unwrap();
let path = temp.path();
let mut mem = Memvid::create(path).unwrap();
mem.enable_lex().unwrap();
let opts = PutOptions::builder()
.uri("mv2://test/login".to_string())
.search_text(
"user clicked login button on the authentication page".to_string(),
)
.build();
mem.put_bytes_with_options(b"login event data", opts)
.unwrap();
mem.commit().unwrap();
let resp = mem
.search(SearchRequest {
query: "login".into(),
top_k: 10,
snippet_chars: 200,
uri: None,
scope: None,
cursor: None,
#[cfg(feature = "temporal_track")]
temporal: None,
as_of_frame: None,
as_of_ts: None,
no_sketch: false,
acl_context: None,
acl_enforcement_mode: crate::types::AclEnforcementMode::Audit,
})
.expect("search must succeed");
assert!(
!resp.hits.is_empty(),
"Should find the frame with 'login' in the message"
);
});
}
}