kql-language-tools 0.1.0

Rust bindings to Kusto.Language for KQL validation and language services
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
//! Safe Rust wrappers for KQL validation
//!
//! This module provides the high-level API for validating KQL queries.

use crate::error::Error;
use crate::ffi::{return_codes, DEFAULT_BUFFER_SIZE, MAX_BUFFER_SIZE};
use crate::loader::{self, LoadedLibrary};
use crate::schema::Schema;
use crate::types::ValidationResult;
use std::ffi::c_int;

/// KQL query validator
///
/// This is the main entry point for validating KQL queries. It manages
/// the connection to the native library and provides safe wrappers
/// around the FFI functions.
///
/// # Example
///
/// ```no_run
/// use kql_language_tools::{KqlValidator, Schema, Table};
///
/// fn main() -> Result<(), kql_language_tools::Error> {
///     let validator = KqlValidator::new()?;
///
///     // Syntax-only validation
///     let result = validator.validate_syntax("SecurityEvent | take 10")?;
///     assert!(result.is_valid());
///
///     // With schema
///     let schema = Schema::new()
///         .table(Table::new("SecurityEvent")
///             .with_column("TimeGenerated", "datetime")
///             .with_column("Account", "string"));
///     let result = validator.validate_with_schema(
///         "SecurityEvent | project TimeGenerated, Account",
///         &schema
///     )?;
///     Ok(())
/// }
/// ```
pub struct KqlValidator {
    lib: &'static LoadedLibrary,
}

impl KqlValidator {
    /// Create a new validator instance
    ///
    /// This loads the native library if not already loaded and
    /// initializes the KQL parser.
    ///
    /// # Errors
    ///
    /// Returns an error if:
    /// - The native library cannot be found
    /// - The library fails to load
    /// - Initialization fails
    pub fn new() -> Result<Self, Error> {
        let lib = loader::load_library()?;
        Ok(Self { lib })
    }

    /// Validate a KQL query for syntax errors only
    ///
    /// This performs syntax-only validation without any schema awareness.
    /// It will catch basic syntax errors but won't validate table or
    /// column names.
    ///
    /// # Arguments
    ///
    /// * `query` - The KQL query string to validate
    ///
    /// # Returns
    ///
    /// A `ValidationResult` containing any diagnostics found.
    pub fn validate_syntax(&self, query: &str) -> Result<ValidationResult, Error> {
        let query_bytes = query.as_bytes();

        // Validate input size fits in c_int (2GB limit on 32-bit)
        let query_len = c_int::try_from(query_bytes.len()).map_err(|_| Error::Internal {
            message: format!(
                "Query too large: {} bytes exceeds c_int max",
                query_bytes.len()
            ),
        })?;

        self.call_ffi_with_retry(|buffer| {
            // SAFETY: This FFI call is safe because:
            // 1. query_bytes.as_ptr() points to valid UTF-8 data for the duration of the call
            // 2. query_len accurately represents the byte length
            // 3. buffer is a valid mutable slice we own
            // 4. The FFI function only reads from query and writes to buffer
            #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
            unsafe {
                (self.lib.validate_syntax)(
                    query_bytes.as_ptr(),
                    query_len,
                    buffer.as_mut_ptr(),
                    buffer.len() as c_int,
                )
            }
        })
    }

    /// Validate a KQL query with schema awareness
    ///
    /// This performs full semantic validation using the provided schema.
    /// It validates that referenced tables and columns exist and have
    /// the correct types.
    ///
    /// # Arguments
    ///
    /// * `query` - The KQL query string to validate
    /// * `schema` - The database schema to validate against
    ///
    /// # Returns
    ///
    /// A `ValidationResult` containing any diagnostics found.
    ///
    /// # Errors
    ///
    /// Returns an error if schema validation is not supported by the
    /// loaded library.
    pub fn validate_with_schema(
        &self,
        query: &str,
        schema: &Schema,
    ) -> Result<ValidationResult, Error> {
        let validate_fn = self
            .lib
            .validate_with_schema
            .ok_or_else(|| Error::Internal {
                message: "Schema validation not supported by loaded library".to_string(),
            })?;

        let query_bytes = query.as_bytes();
        let schema_json = serde_json::to_string(schema)?;
        let schema_bytes = schema_json.as_bytes();

        // Validate input sizes fit in c_int
        let query_len = c_int::try_from(query_bytes.len()).map_err(|_| Error::Internal {
            message: format!("Query too large: {} bytes", query_bytes.len()),
        })?;
        let schema_len = c_int::try_from(schema_bytes.len()).map_err(|_| Error::Internal {
            message: format!("Schema too large: {} bytes", schema_bytes.len()),
        })?;

        self.call_ffi_with_retry(|buffer| {
            // SAFETY: See validate_syntax for safety invariants.
            // Additionally, schema_bytes is valid UTF-8 JSON for the call duration.
            #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
            unsafe {
                validate_fn(
                    query_bytes.as_ptr(),
                    query_len,
                    schema_bytes.as_ptr(),
                    schema_len,
                    buffer.as_mut_ptr(),
                    buffer.len() as c_int,
                )
            }
        })
    }

    /// Check if schema validation is supported
    #[must_use]
    pub fn supports_schema_validation(&self) -> bool {
        self.lib.supports_schema_validation()
    }

    /// Check if completion is supported
    #[must_use]
    pub fn supports_completion(&self) -> bool {
        self.lib.supports_completion()
    }

    /// Check if classification is supported
    #[must_use]
    pub fn supports_classification(&self) -> bool {
        self.lib.supports_classification()
    }

    /// Get syntax classifications for a KQL query (for syntax highlighting)
    ///
    /// Returns a list of classified spans that can be used to highlight
    /// different parts of the query (keywords, operators, identifiers, etc.)
    ///
    /// # Arguments
    ///
    /// * `query` - The KQL query string to classify
    ///
    /// # Returns
    ///
    /// A `ClassificationResult` containing spans with their classification kinds.
    ///
    /// # Errors
    ///
    /// Returns an error if classification is not supported by the loaded library.
    pub fn get_classifications(
        &self,
        query: &str,
    ) -> Result<crate::classification::ClassificationResult, Error> {
        let classify_fn = self
            .lib
            .get_classifications
            .ok_or_else(|| Error::Internal {
                message: "Classification not supported by loaded library".to_string(),
            })?;

        let query_bytes = query.as_bytes();
        let query_len = c_int::try_from(query_bytes.len()).map_err(|_| Error::Internal {
            message: format!("Query too large: {} bytes", query_bytes.len()),
        })?;

        self.call_ffi_json(|buffer| {
            // SAFETY: See validate_syntax for safety invariants.
            #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
            unsafe {
                classify_fn(
                    query_bytes.as_ptr(),
                    query_len,
                    buffer.as_mut_ptr(),
                    buffer.len() as c_int,
                )
            }
        })
    }

    /// Get completion suggestions at a cursor position
    ///
    /// Returns completion items (keywords, functions, tables, columns, etc.)
    /// that are valid at the given cursor position.
    ///
    /// # Arguments
    ///
    /// * `query` - The KQL query string
    /// * `cursor_position` - Cursor position (0-based character offset)
    /// * `schema` - Optional schema for context-aware completions
    ///
    /// # Returns
    ///
    /// A `CompletionResult` containing completion items.
    ///
    /// # Errors
    ///
    /// Returns an error if completion is not supported by the loaded library.
    pub fn get_completions(
        &self,
        query: &str,
        cursor_position: usize,
        schema: Option<&Schema>,
    ) -> Result<crate::completion::CompletionResult, Error> {
        let completions_fn = self.lib.get_completions.ok_or_else(|| Error::Internal {
            message: "Completion not supported by loaded library".to_string(),
        })?;

        let query_bytes = query.as_bytes();
        let schema_json = schema.map(serde_json::to_string).transpose()?;

        // Validate sizes fit in c_int
        let query_len = c_int::try_from(query_bytes.len()).map_err(|_| Error::Internal {
            message: format!("Query too large: {} bytes", query_bytes.len()),
        })?;
        let cursor_pos = c_int::try_from(cursor_position).map_err(|_| Error::Internal {
            message: format!("Cursor position too large: {cursor_position}"),
        })?;

        self.call_ffi_json(|buffer| {
            // SAFETY: See validate_syntax for safety invariants.
            // schema_ptr may be null (handled by FFI), schema_len is 0 in that case.
            #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
            unsafe {
                let (schema_ptr, schema_len) = match &schema_json {
                    Some(json) => (json.as_ptr(), json.len() as c_int),
                    None => (std::ptr::null(), 0),
                };

                completions_fn(
                    query_bytes.as_ptr(),
                    query_len,
                    cursor_pos,
                    schema_ptr,
                    schema_len,
                    buffer.as_mut_ptr(),
                    buffer.len() as c_int,
                )
            }
        })
    }

    /// Call an FFI function with automatic buffer retry on overflow
    #[allow(clippy::cast_sign_loss)]
    fn call_ffi_with_retry<F>(&self, mut ffi_call: F) -> Result<ValidationResult, Error>
    where
        F: FnMut(&mut Vec<u8>) -> c_int,
    {
        let mut buffer = vec![0u8; DEFAULT_BUFFER_SIZE];
        let mut result = ffi_call(&mut buffer);

        // Handle buffer too small - retry with larger buffer
        if return_codes::is_buffer_too_small(result) {
            // Double the buffer size and retry
            let new_size = buffer.len() * 2;
            if new_size > MAX_BUFFER_SIZE {
                return Err(Error::BufferTooSmall {
                    needed: new_size,
                    available: MAX_BUFFER_SIZE,
                });
            }
            buffer.resize(new_size, 0);
            result = ffi_call(&mut buffer);

            // If still too small, give up
            if return_codes::is_buffer_too_small(result) {
                return Err(Error::BufferTooSmall {
                    needed: 0, // Unknown
                    available: buffer.len(),
                });
            }
        }

        // Check for other errors
        if !return_codes::is_success(result) {
            let error_msg = self.get_last_error().unwrap_or_default();
            return Err(Error::from_native_code(result, &error_msg));
        }

        // Parse JSON result
        if result == 0 {
            // Empty result means valid query
            return Ok(ValidationResult::valid());
        }

        let json_len = result as usize;
        let json_str = std::str::from_utf8(&buffer[..json_len])?;

        log::trace!("FFI returned JSON: {json_str}");

        let validation_result: ValidationResult = serde_json::from_str(json_str)?;
        Ok(validation_result)
    }

    /// Call an FFI function and deserialize JSON result to a generic type
    #[allow(clippy::cast_sign_loss)]
    fn call_ffi_json<T, F>(&self, mut ffi_call: F) -> Result<T, Error>
    where
        T: for<'de> serde::Deserialize<'de> + Default,
        F: FnMut(&mut Vec<u8>) -> c_int,
    {
        let mut buffer = vec![0u8; DEFAULT_BUFFER_SIZE];
        let mut result = ffi_call(&mut buffer);

        // Handle buffer too small - retry with larger buffer
        if return_codes::is_buffer_too_small(result) {
            let new_size = buffer.len() * 2;
            if new_size > MAX_BUFFER_SIZE {
                return Err(Error::BufferTooSmall {
                    needed: new_size,
                    available: MAX_BUFFER_SIZE,
                });
            }
            buffer.resize(new_size, 0);
            result = ffi_call(&mut buffer);

            if return_codes::is_buffer_too_small(result) {
                return Err(Error::BufferTooSmall {
                    needed: 0,
                    available: buffer.len(),
                });
            }
        }

        // Check for errors
        if !return_codes::is_success(result) {
            let error_msg = self.get_last_error().unwrap_or_default();
            return Err(Error::from_native_code(result, &error_msg));
        }

        // Parse JSON result
        if result == 0 {
            return Ok(T::default());
        }

        let json_len = result as usize;
        let json_str = std::str::from_utf8(&buffer[..json_len])?;

        log::trace!("FFI returned JSON: {json_str}");

        let parsed_result: T = serde_json::from_str(json_str)?;
        Ok(parsed_result)
    }

    /// Get the last error message from the native library
    #[allow(
        clippy::cast_possible_truncation,
        clippy::cast_possible_wrap,
        clippy::cast_sign_loss
    )]
    fn get_last_error(&self) -> Option<String> {
        let mut buffer = vec![0u8; 1024];
        let result =
            unsafe { (self.lib.get_last_error)(buffer.as_mut_ptr(), buffer.len() as c_int) };

        if return_codes::is_success(result) && result > 0 {
            let len = result as usize;
            String::from_utf8(buffer[..len].to_vec()).ok()
        } else {
            None
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    // These tests require the native library to be available
    // They are ignored by default and can be run with:
    // cargo test --features test-native -- --ignored

    #[test]
    #[ignore = "requires native library"]
    fn test_validate_syntax_valid() {
        let validator = KqlValidator::new().expect("Failed to create validator");
        let result = validator
            .validate_syntax("T | take 10")
            .expect("Validation failed");
        assert!(result.is_valid());
    }

    #[test]
    #[ignore = "requires native library"]
    fn test_validate_syntax_invalid() {
        let validator = KqlValidator::new().expect("Failed to create validator");
        let result = validator
            .validate_syntax("T | invalid_operator")
            .expect("Validation failed");
        assert!(!result.is_valid());
        assert!(result.has_errors());
    }

    #[test]
    #[ignore = "requires native library"]
    fn test_validate_with_schema() {
        let validator = KqlValidator::new().expect("Failed to create validator");

        let schema = Schema::new().table(
            crate::schema::Table::new("SecurityEvent")
                .with_column("TimeGenerated", "datetime")
                .with_column("Account", "string"),
        );

        let result = validator
            .validate_with_schema("SecurityEvent | project TimeGenerated, Account", &schema)
            .expect("Validation failed");
        assert!(result.is_valid());
    }

    #[test]
    #[ignore = "requires native library"]
    fn test_validate_with_schema_unknown_column() {
        let validator = KqlValidator::new().expect("Failed to create validator");

        let schema = Schema::new().table(
            crate::schema::Table::new("SecurityEvent").with_column("TimeGenerated", "datetime"),
        );

        let result = validator
            .validate_with_schema("SecurityEvent | project UnknownColumn", &schema)
            .expect("Validation failed");
        assert!(!result.is_valid());
    }

    #[test]
    #[ignore = "requires native library"]
    fn test_get_classifications() {
        let validator = KqlValidator::new().expect("Failed to create validator");
        let result = validator
            .get_classifications("SecurityEvent | where TimeGenerated > ago(1h) | take 10")
            .expect("Classification failed");

        // Should have some spans
        assert!(!result.spans.is_empty(), "Expected classification spans");

        // Print spans for debugging
        for span in &result.spans {
            println!(
                "Span: start={}, length={}, kind={:?}",
                span.start, span.length, span.kind
            );
        }
    }

    #[test]
    #[ignore = "requires native library"]
    fn test_get_completions_after_pipe() {
        let validator = KqlValidator::new().expect("Failed to create validator");

        // Get completions after the pipe operator
        let query = "SecurityEvent | ";
        let cursor_pos = query.len(); // cursor at end

        let result = validator
            .get_completions(query, cursor_pos, None)
            .expect("Completion failed");

        // Should have completion items (operators like where, project, etc.)
        assert!(!result.items.is_empty(), "Expected completion items");

        // Print items for debugging
        println!("Completions at position {cursor_pos} in '{query}':");
        for item in &result.items {
            println!(
                "  {} ({:?}) - edit_start: {}",
                item.label, item.kind, item.edit_start
            );
        }

        // Should include common operators
        let labels: Vec<_> = result.items.iter().map(|i| i.label.as_str()).collect();
        assert!(
            labels
                .iter()
                .any(|l| l.contains("where") || l.contains("project")),
            "Expected 'where' or 'project' in completions"
        );
    }

    #[test]
    #[ignore = "requires native library"]
    fn test_get_completions_with_schema() {
        let validator = KqlValidator::new().expect("Failed to create validator");

        let schema = Schema::new().table(
            crate::schema::Table::new("SecurityEvent")
                .with_column("TimeGenerated", "datetime")
                .with_column("Account", "string")
                .with_column("Computer", "string"),
        );

        // Get completions after 'project ' - should include column names
        let query = "SecurityEvent | project ";
        let cursor_pos = query.len();

        let result = validator
            .get_completions(query, cursor_pos, Some(&schema))
            .expect("Completion failed");

        assert!(!result.items.is_empty(), "Expected completion items");

        // Print items for debugging
        println!("Completions with schema at position {cursor_pos} in '{query}':");
        for item in &result.items {
            println!(
                "  {} ({:?}) - detail: {:?}",
                item.label, item.kind, item.detail
            );
        }
    }
}