sqry_nl/lib.rs
1//! # sqry-nl: Natural Language Translation Layer for sqry
2//!
3//! This crate provides translation from natural language queries to sqry commands.
4//! It uses a MiniLM-L6-v2-based intent classifier combined with regex-based entity
5//! extraction to produce validated, safe sqry commands.
6//!
7//! ## Architecture
8//!
9//! The translation pipeline consists of:
10//!
11//! 1. **Preprocessor** - Unicode normalization, homoglyph detection, input sanitization
12//! 2. **Entity Extractor** - Regex-based slot filling for symbols, languages, etc.
13//! 3. **Intent Classifier** - MiniLM-L6-v2 ONNX model for intent classification
14//! 4. **Template Assembler** - Maps (intent, entities) to sqry command templates
15//! 5. **Safety Validator** - Whitelist validation, metachar rejection, path guards
16//! 6. **Translation Cache** - LRU cache for repeated queries
17//!
18//! ## Example
19//!
20//! ```rust,ignore
21//! use sqry_nl::{Translator, TranslatorConfig, TranslationResponse};
22//!
23//! let config = TranslatorConfig::default();
24//! let translator = Translator::load(config)?;
25//!
26//! match translator.translate("find authentication functions in rust") {
27//! TranslationResponse::Execute { command, confidence, .. } => {
28//! println!("Command: {} (confidence: {:.1}%)", command, confidence * 100.0);
29//! // Execute the command via sqry CLI
30//! }
31//! TranslationResponse::Confirm { command, prompt, .. } => {
32//! println!("{}", prompt);
33//! // Ask user for confirmation
34//! }
35//! TranslationResponse::Disambiguate { options, prompt } => {
36//! println!("{}", prompt);
37//! // Present options to user
38//! }
39//! TranslationResponse::Reject { reason, suggestions } => {
40//! eprintln!("Cannot translate: {}", reason);
41//! // Show suggestions
42//! }
43//! }
44//! ```
45//!
46//! ## Safety
47//!
48//! All generated commands are validated against a strict whitelist of allowed
49//! command templates. The following are always rejected:
50//!
51//! - Shell metacharacters (`;`, `|`, `&`, `$`, etc.)
52//! - Environment variable expansion (`$HOME`, `${VAR}`)
53//! - Path traversal (`..`, absolute paths)
54//! - Write-mode operations (`--force`, `repair`, `prune`)
55//!
56//! ## Feature Flags
57//!
58//! - `classifier` (default) - Enable the MiniLM-L6-v2 classifier. Requires ONNX Runtime.
59//! Disable for minimal builds that only need rule-based classification.
60
61// Public modules
62pub mod assembler;
63pub mod cache;
64pub mod classifier;
65pub mod error;
66pub mod extractor;
67pub mod preprocess;
68pub mod translator;
69pub mod types;
70pub mod validator;
71
72// Re-exports for convenience
73pub use cache::{CacheConfig, CacheStats};
74#[cfg(feature = "classifier")]
75pub use classifier::onnx_runtime_install_hint;
76pub use error::{NlError, NlResult};
77pub use translator::{Translator, TranslatorConfig};
78pub use types::{
79 DisambiguationOption, ExtractedEntities, Intent, OutputFormat, SymbolKind, TranslationResponse,
80 ValidationStatus,
81};
82
83/// Crate version for model compatibility checks
84pub const VERSION: &str = env!("CARGO_PKG_VERSION");
85
86#[cfg(test)]
87mod tests {
88 use super::*;
89
90 #[test]
91 fn test_types_are_send_sync() {
92 fn assert_send<T: Send>() {}
93 fn assert_sync<T: Sync>() {}
94
95 // Core types must be thread-safe
96 assert_send::<Intent>();
97 assert_sync::<Intent>();
98 assert_send::<ValidationStatus>();
99 assert_sync::<ValidationStatus>();
100 assert_send::<SymbolKind>();
101 assert_sync::<SymbolKind>();
102 assert_send::<OutputFormat>();
103 assert_sync::<OutputFormat>();
104 assert_send::<ExtractedEntities>();
105 assert_sync::<ExtractedEntities>();
106 assert_send::<TranslationResponse>();
107 assert_sync::<TranslationResponse>();
108 }
109
110 #[test]
111 fn test_version_available() {
112 // VERSION is always non-empty (from Cargo.toml) but we test
113 // that the package version is properly exposed and matches expected format
114 assert!(
115 VERSION.contains('.'),
116 "VERSION should contain a dot separator"
117 );
118 }
119}