Skip to main content

sqry_nl/
lib.rs

1//! # sqry-nl: Natural Language Translation Layer for sqry
2//!
3//! This crate provides translation from natural language queries to sqry commands.
4//! It uses a MiniLM-L6-v2-based intent classifier combined with regex-based entity
5//! extraction to produce validated, safe sqry commands.
6//!
7//! ## Architecture
8//!
9//! The translation pipeline consists of:
10//!
11//! 1. **Preprocessor** - Unicode normalization, homoglyph detection, input sanitization
12//! 2. **Entity Extractor** - Regex-based slot filling for symbols, languages, etc.
13//! 3. **Intent Classifier** - MiniLM-L6-v2 ONNX model for intent classification
14//! 4. **Template Assembler** - Maps (intent, entities) to sqry command templates
15//! 5. **Safety Validator** - Whitelist validation, metachar rejection, path guards
16//! 6. **Translation Cache** - LRU cache for repeated queries
17//!
18//! ## Example
19//!
20//! ```rust,ignore
21//! use sqry_nl::{Translator, TranslatorConfig, TranslationResponse};
22//!
23//! let config = TranslatorConfig::default();
24//! let translator = Translator::load(config)?;
25//!
26//! match translator.translate("find authentication functions in rust") {
27//!     TranslationResponse::Execute { command, confidence, .. } => {
28//!         println!("Command: {} (confidence: {:.1}%)", command, confidence * 100.0);
29//!         // Execute the command via sqry CLI
30//!     }
31//!     TranslationResponse::Confirm { command, prompt, .. } => {
32//!         println!("{}", prompt);
33//!         // Ask user for confirmation
34//!     }
35//!     TranslationResponse::Disambiguate { options, prompt } => {
36//!         println!("{}", prompt);
37//!         // Present options to user
38//!     }
39//!     TranslationResponse::Reject { reason, suggestions } => {
40//!         eprintln!("Cannot translate: {}", reason);
41//!         // Show suggestions
42//!     }
43//! }
44//! ```
45//!
46//! ## Safety
47//!
48//! All generated commands are validated against a strict whitelist of allowed
49//! command templates. The following are always rejected:
50//!
51//! - Shell metacharacters (`;`, `|`, `&`, `$`, etc.)
52//! - Environment variable expansion (`$HOME`, `${VAR}`)
53//! - Path traversal (`..`, absolute paths)
54//! - Write-mode operations (`--force`, `repair`, `prune`)
55//!
56//! ## Feature Flags
57//!
58//! - `classifier` (default) - Enable the MiniLM-L6-v2 classifier. Requires ONNX Runtime.
59//!   Disable for minimal builds that only need rule-based classification.
60
61// Public modules
62pub mod assembler;
63pub mod cache;
64pub mod classifier;
65pub mod error;
66pub mod extractor;
67pub mod preprocess;
68pub mod translator;
69pub mod types;
70pub mod validator;
71
72// Re-exports for convenience
73pub use cache::{CacheConfig, CacheStats};
74#[cfg(feature = "classifier")]
75pub use classifier::onnx_runtime_install_hint;
76pub use error::{NlError, NlResult};
77pub use translator::{Translator, TranslatorConfig};
78pub use types::{
79    DisambiguationOption, ExtractedEntities, Intent, OutputFormat, SymbolKind, TranslationResponse,
80    ValidationStatus,
81};
82
83/// Crate version for model compatibility checks
84pub const VERSION: &str = env!("CARGO_PKG_VERSION");
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89
90    #[test]
91    fn test_types_are_send_sync() {
92        fn assert_send<T: Send>() {}
93        fn assert_sync<T: Sync>() {}
94
95        // Core types must be thread-safe
96        assert_send::<Intent>();
97        assert_sync::<Intent>();
98        assert_send::<ValidationStatus>();
99        assert_sync::<ValidationStatus>();
100        assert_send::<SymbolKind>();
101        assert_sync::<SymbolKind>();
102        assert_send::<OutputFormat>();
103        assert_sync::<OutputFormat>();
104        assert_send::<ExtractedEntities>();
105        assert_sync::<ExtractedEntities>();
106        assert_send::<TranslationResponse>();
107        assert_sync::<TranslationResponse>();
108    }
109
110    #[test]
111    fn test_version_available() {
112        // VERSION is always non-empty (from Cargo.toml) but we test
113        // that the package version is properly exposed and matches expected format
114        assert!(
115            VERSION.contains('.'),
116            "VERSION should contain a dot separator"
117        );
118    }
119}