1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
//! # Interstellar Full-Text Search Quickstart
//!
//! An introduction to Interstellar's Tantivy-backed full-text search.
//!
//! This example demonstrates:
//! - Registering a text index on a vertex property (spec-55)
//! - Indexing documents (tokens are extracted on insert / update / remove)
//! - Running BM25-ranked free-text queries via the Rust fluent API
//! - Running a structured phrase query via `TextQuery`
//! - Chaining `search_text` with regular Gremlin steps
//! - Edge-side text search (spec-55b) via `search_text_e`
//! - Querying the same engine from Gremlin (`g.searchTextV` /
//! `g.searchTextE` / `textScore()`) and GQL (`CALL interstellar.searchText*`
//! + `YIELD`), per spec-55c
//!
//! Run: `cargo run --example quickstart_text_search --features full-text`
use std::collections::HashMap;
use std::sync::Arc;
use interstellar::gremlin::ExecutionResult;
use interstellar::storage::text::{ElementRef, TextIndexConfig, TextQuery};
use interstellar::storage::Graph;
use interstellar::value::Value;
fn main() {
println!("=== Interstellar Full-Text Search Quickstart ===\n");
// -------------------------------------------------------------------------
// 1. Create an in-memory graph and a Gremlin traversal source
// -------------------------------------------------------------------------
let graph = Arc::new(Graph::new());
let g = graph.gremlin(Arc::clone(&graph));
// -------------------------------------------------------------------------
// 2. Register a text index on the `body` property
//
// The default `TextIndexConfig` uses a standard analyzer (lowercase +
// unicode tokenizer) and stores positions so phrase queries work.
//
// Indexes can be created BEFORE or AFTER vertices exist. If created
// afterwards, the existing string values are back-filled automatically.
// -------------------------------------------------------------------------
graph
.create_text_index_v("body", TextIndexConfig::default())
.expect("text index creation failed");
println!("Registered text index on `body`");
// -------------------------------------------------------------------------
// 3. Insert some documents
//
// The mutation hooks in `Graph::add_vertex` automatically upsert the
// `body` field into the text index.
// -------------------------------------------------------------------------
let _intro = g
.add_v("article")
.property("title", "Intro to Raft")
.property("body", "raft is a consensus algorithm for replicated logs")
.next()
.unwrap();
let _paxos = g
.add_v("article")
.property("title", "Paxos Made Simple")
.property("body", "paxos is the classical consensus protocol")
.next()
.unwrap();
let _gossip = g
.add_v("article")
.property("title", "Epidemic Broadcast")
.property(
"body",
"gossip protocols disseminate state across a cluster",
)
.next()
.unwrap();
let _note = g
.add_v("note")
.property("body", "raft and paxos are both consensus protocols")
.next()
.unwrap();
println!("Indexed 4 documents (3 articles, 1 note)\n");
// -------------------------------------------------------------------------
// 4. Free-text search: top-k by BM25 relevance
//
// `search_text(property, query, k)` returns a traversal seeded with the
// top-k matching vertices, sorted by descending BM25 score.
// -------------------------------------------------------------------------
println!("-- Free-text search for \"consensus\" (top 5) --\n");
let hits = g
.search_text("body", "consensus", 5)
.expect("search failed")
.values("title")
.to_value_list();
for (rank, title) in hits.iter().enumerate() {
if let Value::String(s) = title {
println!(" {}. {}", rank + 1, s);
}
}
// -------------------------------------------------------------------------
// 5. Chain search results with regular Gremlin steps
//
// `search_text` is just another source step; it composes with `has_label`,
// `out`, `where_`, etc. Here we restrict to articles only.
// -------------------------------------------------------------------------
println!("\n-- \"consensus\" restricted to label=article --\n");
let titles = g
.search_text("body", "consensus", 10)
.expect("search failed")
.has_label("article")
.values("title")
.to_value_list();
for title in &titles {
if let Value::String(s) = title {
println!(" - {}", s);
}
}
// -------------------------------------------------------------------------
// 6. Structured queries via `TextQuery`
//
// Use `search_text_query` for phrase, prefix, boolean, and fuzzy queries.
// -------------------------------------------------------------------------
println!("\n-- Phrase query: \"replicated logs\" --\n");
let phrase = TextQuery::Phrase {
text: "replicated logs".to_string(),
slop: 0,
};
let phrase_hits = g
.search_text_query("body", &phrase, 10)
.expect("search failed")
.values("title")
.to_value_list();
for title in &phrase_hits {
if let Value::String(s) = title {
println!(" - {}", s);
}
}
// -------------------------------------------------------------------------
// 7. Updates flow through the index automatically
//
// `set_vertex_property` replaces the indexed text. Tokens that no longer
// appear in the new value stop matching; new tokens become searchable.
// -------------------------------------------------------------------------
println!("\n-- Updating a document --\n");
let gossip_id = g
.v()
.has_value("title", Value::from("Epidemic Broadcast"))
.next()
.unwrap()
.id();
graph
.set_vertex_property(
gossip_id,
"body",
Value::String("gossip implements eventual consistency for state".into()),
)
.unwrap();
let consistency = g
.search_text("body", "consistency", 5)
.expect("search failed")
.values("title")
.to_value_list();
println!("After update, search for \"consistency\":");
for title in &consistency {
if let Value::String(s) = title {
println!(" - {}", s);
}
}
// -------------------------------------------------------------------------
// 8. Full-text search on EDGES
//
// Edges with text payloads (comments, endorsements, annotations) work
// exactly like vertices. Use the `_e` suffix throughout:
// - `create_text_index_e` to register
// - `search_text_e` / `search_text_query_e` as source steps
// - `text_index_e` for direct programmatic access
//
// Property names are GLOBALLY unique across vertex and edge indexes, so
// we register `note` (not `body`, which is taken by the vertex index).
// -------------------------------------------------------------------------
println!("\n-- Edge full-text search --\n");
graph
.create_text_index_e("note", TextIndexConfig::default())
.expect("edge text index creation failed");
// Two anchor vertices to attach edges to.
let alice = g
.add_v("user")
.property("name", "Alice")
.next()
.unwrap()
.id();
let bob = g.add_v("user").property("name", "Bob").next().unwrap().id();
// Several `endorses` edges with body text.
let mk_props = |body: &str| {
let mut p = HashMap::new();
p.insert("note".to_string(), Value::String(body.to_string()));
p
};
graph
.add_edge(
alice,
bob,
"endorses",
mk_props("raft is the cleanest consensus algorithm"),
)
.unwrap();
graph
.add_edge(
alice,
bob,
"endorses",
mk_props("paxos is harder to implement than raft"),
)
.unwrap();
graph
.add_edge(
bob,
alice,
"endorses",
mk_props("gossip protocols scale beautifully"),
)
.unwrap();
// Free-text edge search.
println!("Free-text edge search for \"raft\":");
let raft_edges = g
.search_text_e("note", "raft", 10)
.expect("edge search failed")
.values("note")
.to_value_list();
for (rank, note) in raft_edges.iter().enumerate() {
if let Value::String(s) = note {
println!(" {}. {}", rank + 1, s);
}
}
// Phrase query on edges.
println!("\nPhrase query on edges: \"consensus algorithm\"");
let phrase_q = TextQuery::Phrase {
text: "consensus algorithm".to_string(),
slop: 0,
};
let phrase_edges = g
.search_text_query_e("note", &phrase_q, 10)
.expect("edge phrase search failed")
.values("note")
.to_value_list();
for note in &phrase_edges {
if let Value::String(s) = note {
println!(" - {}", s);
}
}
// Programmatic access: inspect raw scores via `text_index_e`.
//
// The handle returned by `text_index_e` exposes `search`, which yields
// `TextHit { element, score }`. For edge indexes, `element` is always
// `ElementRef::Edge(EdgeId)`.
println!("\nRaw BM25 scores for \"raft\" on edges:");
let edge_index = graph.text_index_e("note").expect("edge index missing");
let raw_hits = edge_index
.search(&TextQuery::Match("raft".to_string()), 10)
.expect("raw edge search failed");
for hit in &raw_hits {
if let ElementRef::Edge(eid) = hit.element {
println!(" edge {:?}: score = {:.4}", eid, hit.score);
}
}
// -------------------------------------------------------------------------
// 6. Query-language surfaces (spec-55c): Gremlin and GQL
//
// The same FTS engine is reachable from both query languages. The Rust
// API stays the most expressive (compound And/Or/Not is Gremlin or
// Rust-only), but the language surfaces cover the common cases.
// -------------------------------------------------------------------------
println!("\n=== Query languages ===");
// -- Gremlin: bare-string sugars to TextQ.match --------------------------
println!("\nGremlin g.searchTextV('body', 'raft', 5).values('title'):");
let titles = graph
.execute_script("g.searchTextV('body', 'raft', 5).values('title')")
.expect("gremlin failed");
if let ExecutionResult::List(values) = &titles.result {
for v in values {
if let Value::String(s) = v {
println!(" - {s}");
}
}
}
// -- Gremlin: structured TextQ.phrase + textScore() ----------------------
println!("\nGremlin g.searchTextV(... TextQ.phrase('consensus algorithm') ...).textScore():");
let scores = graph
.execute_script("g.searchTextV('body', TextQ.phrase('consensus algorithm'), 5).textScore()")
.expect("gremlin failed");
if let ExecutionResult::List(values) = &scores.result {
for (rank, v) in values.iter().enumerate() {
if let Value::Float(s) = v {
println!(" {}. score = {:.4}", rank + 1, s);
}
}
}
// -- GQL: CALL procedure with YIELD elemId, score ------------------------
println!("\nGQL CALL interstellar.searchTextV('body', 'raft', 5):");
let rows = graph
.gql(
"MATCH (anchor) WHERE id(anchor) = 0 \
CALL interstellar.searchTextV('body', 'raft', 5) \
YIELD elemId, score RETURN elemId, score",
)
.expect("gql failed");
for row in &rows {
if let Value::Map(m) = row {
let id = m.get("elemId");
let score = m.get("score");
println!(" elemId={id:?}, score={score:?}");
}
}
// -- GQL: edge-side prefix expansion -------------------------------------
println!("\nGQL CALL interstellar.searchTextPrefixE('note', 'consen', 5):");
let edge_rows = graph
.gql(
"MATCH (anchor) WHERE id(anchor) = 0 \
CALL interstellar.searchTextPrefixE('note', 'consen', 5) \
YIELD elemId, score RETURN elemId, score",
)
.expect("gql failed");
for row in &edge_rows {
if let Value::Map(m) = row {
println!(" {:?}", m);
}
}
println!("\n=== Quickstart Complete ===");
}