velesdb-server 1.4.4

REST API server for VelesDB vector database
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
//! Request/Response types for VelesDB REST API.
//!
//! This module contains all the data transfer objects used by the API handlers.

use serde::{Deserialize, Serialize};
use utoipa::ToSchema;

/// Canonical VelesQL contract version for REST responses.
pub const VELESQL_CONTRACT_VERSION: &str = "2.1.0";

// ============================================================================
// Collection Types
// ============================================================================

/// Request to create a new collection.
#[derive(Debug, Deserialize, ToSchema)]
pub struct CreateCollectionRequest {
    /// Collection name.
    #[schema(example = "documents")]
    pub name: String,
    /// Vector dimension (required for vector collections, ignored for metadata_only).
    #[schema(example = 768)]
    pub dimension: Option<usize>,
    /// Distance metric (cosine, euclidean, dot, hamming, jaccard).
    #[serde(default = "default_metric")]
    #[schema(example = "cosine")]
    pub metric: String,
    /// Storage mode (full, sq8, binary). Defaults to full.
    #[serde(default = "default_storage_mode")]
    #[schema(example = "full")]
    pub storage_mode: String,
    /// Collection type: "vector" (default) or "metadata_only".
    #[serde(default = "default_collection_type")]
    #[schema(example = "vector")]
    pub collection_type: String,
}

fn default_collection_type() -> String {
    "vector".to_string()
}

fn default_metric() -> String {
    "cosine".to_string()
}

fn default_storage_mode() -> String {
    "full".to_string()
}

/// Response with collection information.
#[derive(Debug, Serialize, ToSchema)]
pub struct CollectionResponse {
    /// Collection name.
    pub name: String,
    /// Vector dimension.
    pub dimension: usize,
    /// Distance metric.
    pub metric: String,
    /// Number of points in the collection.
    pub point_count: usize,
    /// Storage mode (full, sq8, binary).
    pub storage_mode: String,
}

// ============================================================================
// Point Types
// ============================================================================

/// Request to upsert points.
#[derive(Debug, Deserialize, ToSchema)]
pub struct UpsertPointsRequest {
    /// Points to upsert.
    pub points: Vec<PointRequest>,
}

/// A point in an upsert request.
#[derive(Debug, Deserialize, ToSchema)]
pub struct PointRequest {
    /// Point ID.
    pub id: u64,
    /// Vector data.
    pub vector: Vec<f32>,
    /// Optional payload.
    pub payload: Option<serde_json::Value>,
}

// ============================================================================
// Search Types
// ============================================================================

/// Request for vector search.
#[derive(Debug, Deserialize, ToSchema)]
pub struct SearchRequest {
    /// Query vector.
    pub vector: Vec<f32>,
    /// Number of results to return.
    #[serde(default = "default_top_k")]
    pub top_k: usize,
    /// Search mode preset: fast, balanced, accurate, perfect.
    /// Overrides ef_search with predefined values.
    #[serde(default)]
    #[schema(example = "balanced")]
    pub mode: Option<String>,
    /// HNSW ef_search parameter (higher = better recall, slower).
    /// Overrides mode if both are specified.
    #[serde(default)]
    #[schema(example = 128)]
    pub ef_search: Option<usize>,
    /// Query timeout in milliseconds.
    #[serde(default)]
    #[schema(example = 30000)]
    pub timeout_ms: Option<u64>,
    /// Optional metadata filter to apply to results (JSON object with condition).
    #[serde(default)]
    pub filter: Option<serde_json::Value>,
}

/// Request for batch vector search.
#[derive(Debug, Deserialize, ToSchema)]
pub struct BatchSearchRequest {
    /// List of search requests.
    pub searches: Vec<SearchRequest>,
}

fn default_top_k() -> usize {
    10
}

/// Convert mode string to ef_search value.
#[must_use]
pub fn mode_to_ef_search(mode: &str) -> Option<usize> {
    match mode.to_lowercase().as_str() {
        "fast" => Some(64),
        "balanced" => Some(128),
        "accurate" => Some(256),
        "perfect" => Some(usize::MAX),
        _ => None,
    }
}

/// Response from vector search.
#[derive(Debug, Serialize, ToSchema)]
pub struct SearchResponse {
    /// Search results.
    pub results: Vec<SearchResultResponse>,
}

/// Response from batch search.
#[derive(Debug, Serialize, ToSchema)]
pub struct BatchSearchResponse {
    /// Results for each search query.
    pub results: Vec<SearchResponse>,
    /// Total time in milliseconds.
    pub timing_ms: f64,
}

/// A single search result.
#[derive(Debug, Serialize, ToSchema)]
pub struct SearchResultResponse {
    /// Point ID.
    pub id: u64,
    /// Similarity score.
    pub score: f32,
    /// Point payload.
    pub payload: Option<serde_json::Value>,
}

/// Error response.
#[derive(Debug, Serialize, ToSchema)]
pub struct ErrorResponse {
    /// Error message.
    pub error: String,
}

/// Request for BM25 text search.
#[derive(Debug, Deserialize, ToSchema)]
pub struct TextSearchRequest {
    /// Text query for full-text search.
    #[schema(example = "rust programming")]
    pub query: String,
    /// Number of results to return.
    #[serde(default = "default_top_k")]
    #[schema(example = 10)]
    pub top_k: usize,
    /// Optional metadata filter to apply to results (JSON object with condition).
    #[serde(default)]
    pub filter: Option<serde_json::Value>,
}

/// Request for hybrid search (vector + text).
#[derive(Debug, Deserialize, ToSchema)]
pub struct HybridSearchRequest {
    /// Query vector for similarity search.
    pub vector: Vec<f32>,
    /// Text query for BM25 search.
    #[schema(example = "rust programming")]
    pub query: String,
    /// Number of results to return.
    #[serde(default = "default_top_k")]
    #[schema(example = 10)]
    pub top_k: usize,
    /// Weight for vector similarity (0.0-1.0). Text weight = 1 - vector_weight.
    #[serde(default = "default_vector_weight")]
    #[schema(example = 0.5)]
    pub vector_weight: f32,
    /// Optional metadata filter to apply to results (JSON object with condition).
    #[serde(default)]
    pub filter: Option<serde_json::Value>,
}

fn default_vector_weight() -> f32 {
    0.5
}

/// Request for multi-query vector search with fusion.
#[derive(Debug, Deserialize, ToSchema)]
pub struct MultiQuerySearchRequest {
    /// List of query vectors.
    pub vectors: Vec<Vec<f32>>,
    /// Number of results to return.
    #[serde(default = "default_top_k")]
    #[schema(example = 10)]
    pub top_k: usize,
    /// Fusion strategy: "average", "maximum", "rrf", "weighted".
    #[serde(default = "default_fusion_strategy")]
    #[schema(example = "rrf")]
    pub strategy: String,
    /// RRF k parameter (only used when strategy = "rrf").
    #[serde(default = "default_rrf_k")]
    #[schema(example = 60)]
    pub rrf_k: u32,
    /// Weighted fusion: weight for average score component (default 0.5).
    #[serde(default = "default_avg_weight")]
    #[schema(example = 0.5)]
    pub avg_weight: f32,
    /// Weighted fusion: weight for max score component (default 0.3).
    #[serde(default = "default_max_weight")]
    #[schema(example = 0.3)]
    pub max_weight: f32,
    /// Weighted fusion: weight for hit count component (default 0.2).
    #[serde(default = "default_hit_weight")]
    #[schema(example = 0.2)]
    pub hit_weight: f32,
    /// Optional metadata filter to apply to results.
    #[serde(default)]
    pub filter: Option<serde_json::Value>,
}

fn default_fusion_strategy() -> String {
    "rrf".to_string()
}

fn default_rrf_k() -> u32 {
    60
}

fn default_avg_weight() -> f32 {
    0.5
}

fn default_max_weight() -> f32 {
    0.3
}

fn default_hit_weight() -> f32 {
    0.2
}

// ============================================================================
// Query Types
// ============================================================================

/// Request for `VelesQL` query execution.
#[derive(Debug, Deserialize, ToSchema)]
pub struct QueryRequest {
    /// The `VelesQL` query string.
    pub query: String,
    /// Named parameters for the query.
    #[serde(default)]
    pub params: std::collections::HashMap<String, serde_json::Value>,
    /// Optional collection name.
    /// Required for top-level MATCH queries executed via `/query`.
    #[serde(default)]
    pub collection: Option<String>,
}

/// Query type for unified /query endpoint (EPIC-052 US-006).
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "lowercase")]
pub enum QueryType {
    /// Vector similarity search (contains similarity() or NEAR).
    Search,
    /// Aggregation query (GROUP BY, COUNT, SUM, etc.).
    Aggregation,
    /// Simple SELECT returning rows.
    Rows,
    /// Graph pattern matching (MATCH clause).
    Graph,
}

/// Response from VelesQL query execution.
#[derive(Debug, Serialize, ToSchema)]
pub struct QueryResponse {
    /// Query results.
    pub results: Vec<SearchResultResponse>,
    /// Query execution time in milliseconds.
    pub timing_ms: f64,
    /// Query execution time in whole milliseconds (compat helper for API clients).
    pub took_ms: u64,
    /// Number of rows returned.
    pub rows_returned: usize,
    /// Query response metadata (contracted fields for SDK parity).
    pub meta: QueryResponseMeta,
}

/// Metadata section for VelesQL query responses.
#[derive(Debug, Serialize, ToSchema)]
pub struct QueryResponseMeta {
    /// VelesQL contract version used by this response.
    pub velesql_contract_version: String,
    /// Number of rows in `results`.
    pub count: usize,
}

/// Unified response from /query endpoint (EPIC-052 US-006).
#[derive(Debug, Serialize, ToSchema)]
pub struct UnifiedQueryResponse {
    /// Type of query executed.
    #[serde(rename = "type")]
    pub query_type: QueryType,
    /// Number of results.
    pub count: usize,
    /// Execution time in milliseconds.
    pub timing_ms: f64,
    /// Results (structure depends on query_type).
    pub results: serde_json::Value,
    /// Optional warnings (e.g., truncated, deprecated).
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub warnings: Vec<String>,
}

/// Response from VelesQL aggregation query execution (BUG-1 FIX).
#[derive(Debug, Serialize, ToSchema)]
pub struct AggregationResponse {
    /// Aggregation results (JSON object or array for GROUP BY).
    pub result: serde_json::Value,
    /// Query execution time in milliseconds.
    pub timing_ms: f64,
}

/// VelesQL query error response.
#[derive(Debug, Serialize, ToSchema)]
pub struct QueryErrorResponse {
    /// Error details.
    pub error: QueryErrorDetail,
}

// ============================================================================
// EXPLAIN Types (EPIC-058 US-002)
// ============================================================================

/// Request for query EXPLAIN.
#[derive(Debug, Deserialize, ToSchema)]
pub struct ExplainRequest {
    /// The `VelesQL` query string to explain.
    #[schema(example = "SELECT * FROM docs WHERE category = 'tech' AND vector NEAR $v LIMIT 10")]
    pub query: String,
    /// Named parameters for the query (optional, used for validation).
    #[serde(default)]
    pub params: std::collections::HashMap<String, serde_json::Value>,
}

/// Response from query EXPLAIN.
#[derive(Debug, Serialize, ToSchema)]
pub struct ExplainResponse {
    /// The original query.
    pub query: String,
    /// Query type (SELECT, MATCH, etc.).
    pub query_type: String,
    /// Target collection name.
    pub collection: String,
    /// Query plan steps.
    pub plan: Vec<ExplainStep>,
    /// Estimated cost metrics.
    pub estimated_cost: ExplainCost,
    /// Query features detected.
    pub features: ExplainFeatures,
}

/// A step in the query execution plan.
#[derive(Debug, Serialize, ToSchema)]
pub struct ExplainStep {
    /// Step number (1-indexed).
    pub step: usize,
    /// Operation type (e.g., "VectorSearch", "Filter", "Sort").
    pub operation: String,
    /// Description of what this step does.
    pub description: String,
    /// Estimated rows processed/produced.
    pub estimated_rows: Option<usize>,
}

/// Estimated cost metrics for the query.
#[derive(Debug, Serialize, ToSchema)]
pub struct ExplainCost {
    /// Whether an index can be used.
    pub uses_index: bool,
    /// Index name if used.
    pub index_name: Option<String>,
    /// Estimated selectivity (0.0 - 1.0).
    pub selectivity: f64,
    /// Estimated complexity class.
    pub complexity: String,
}

/// Features detected in the query.
#[allow(clippy::struct_excessive_bools)]
#[derive(Debug, Serialize, ToSchema)]
pub struct ExplainFeatures {
    /// Has vector search (NEAR clause).
    pub has_vector_search: bool,
    /// Has metadata filter (WHERE without NEAR).
    pub has_filter: bool,
    /// Has ORDER BY clause.
    pub has_order_by: bool,
    /// Has GROUP BY clause.
    pub has_group_by: bool,
    /// Has aggregation functions.
    pub has_aggregation: bool,
    /// Has JOIN clause.
    pub has_join: bool,
    /// Has FUSION clause.
    pub has_fusion: bool,
    /// LIMIT value if present.
    pub limit: Option<u64>,
    /// OFFSET value if present.
    pub offset: Option<u64>,
}

/// VelesQL query error detail.
#[derive(Debug, Serialize, ToSchema)]
pub struct QueryErrorDetail {
    /// Error type.
    #[serde(rename = "type")]
    pub error_type: String,
    /// Error message.
    pub message: String,
    /// Position in query where error occurred.
    pub position: usize,
    /// Fragment of query around error.
    pub query: String,
}

/// Standardized VelesQL semantic/runtime error payload.
#[derive(Debug, Serialize, ToSchema)]
pub struct VelesqlErrorDetail {
    /// Stable machine-readable error code.
    pub code: String,
    /// Human-readable error message.
    pub message: String,
    /// Actionable hint for developers.
    pub hint: String,
    /// Optional additional details.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub details: Option<serde_json::Value>,
}

/// Standardized VelesQL semantic/runtime error response.
#[derive(Debug, Serialize, ToSchema)]
pub struct VelesqlErrorResponse {
    /// Error details.
    pub error: VelesqlErrorDetail,
}

// ============================================================================
// Index Management Types (EPIC-009)
// ============================================================================

/// Request to create a property index.
#[derive(Debug, Deserialize, ToSchema)]
pub struct CreateIndexRequest {
    /// Node label to index (e.g., "Person").
    #[schema(example = "Person")]
    pub label: String,
    /// Property name to index (e.g., "email").
    #[schema(example = "email")]
    pub property: String,
    /// Index type: "hash" (equality O(1)) or "range" (range queries O(log n)).
    #[serde(default = "default_index_type")]
    #[schema(example = "hash")]
    pub index_type: String,
}

fn default_index_type() -> String {
    "hash".to_string()
}

/// Response with index information.
#[derive(Debug, Serialize, ToSchema)]
pub struct IndexResponse {
    /// Node label.
    pub label: String,
    /// Property name.
    pub property: String,
    /// Index type (hash or range).
    pub index_type: String,
    /// Number of unique values indexed.
    pub cardinality: usize,
    /// Memory usage in bytes.
    pub memory_bytes: usize,
}

/// Response listing all indexes.
#[derive(Debug, Serialize, ToSchema)]
pub struct ListIndexesResponse {
    /// List of indexes.
    pub indexes: Vec<IndexResponse>,
    /// Total number of indexes.
    pub total: usize,
}