sqry_nl/lib.rs
1//! # sqry-nl: Natural Language Translation Layer for sqry
2//!
3//! This crate provides translation from natural language queries to sqry commands.
4//! It uses a MiniLM-L6-v2-based intent classifier combined with regex-based entity
5//! extraction to produce validated, safe sqry commands.
6//!
7//! ## Architecture
8//!
9//! The translation pipeline consists of:
10//!
11//! 1. **Preprocessor** - Unicode normalization, homoglyph detection, input sanitization
12//! 2. **Entity Extractor** - Regex-based slot filling for symbols, languages, etc.
13//! 3. **Intent Classifier** - MiniLM-L6-v2 ONNX model for intent classification
14//! 4. **Template Assembler** - Maps (intent, entities) to sqry command templates
15//! 5. **Safety Validator** - Whitelist validation, metachar rejection, path guards
16//! 6. **Translation Cache** - LRU cache for repeated queries
17//!
18//! ## Example
19//!
20//! ```rust,ignore
21//! use sqry_nl::{Translator, TranslatorConfig, TranslationResponse};
22//!
23//! let config = TranslatorConfig::default();
24//! let translator = Translator::load(config)?;
25//!
26//! match translator.translate("find authentication functions in rust") {
27//! TranslationResponse::Execute { command, confidence, .. } => {
28//! println!("Command: {} (confidence: {:.1}%)", command, confidence * 100.0);
29//! // Execute the command via sqry CLI
30//! }
31//! TranslationResponse::Confirm { command, prompt, .. } => {
32//! println!("{}", prompt);
33//! // Ask user for confirmation
34//! }
35//! TranslationResponse::Disambiguate { options, prompt } => {
36//! println!("{}", prompt);
37//! // Present options to user
38//! }
39//! TranslationResponse::Reject { reason, suggestions } => {
40//! eprintln!("Cannot translate: {}", reason);
41//! // Show suggestions
42//! }
43//! }
44//! ```
45//!
46//! ## Safety
47//!
48//! All generated commands are validated against a strict whitelist of allowed
49//! command templates. The following are always rejected:
50//!
51//! - Shell metacharacters (`;`, `|`, `&`, `$`, etc.)
52//! - Environment variable expansion (`$HOME`, `${VAR}`)
53//! - Path traversal (`..`, absolute paths)
54//! - Write-mode operations (`--force`, `repair`, `prune`)
55//!
56//! ## Feature Flags
57//!
58//! - `classifier` (default) - Enable the MiniLM-L6-v2 classifier. Requires ONNX Runtime.
59//! Disable for minimal builds that only need rule-based classification.
60
61// Public modules
62pub mod assembler;
63pub mod cache;
64#[cfg(feature = "classifier")]
65pub mod classifier;
66pub mod error;
67pub mod extractor;
68pub mod preprocess;
69pub mod translator;
70pub mod types;
71pub mod validator;
72
73// Re-exports for convenience
74pub use cache::{CacheConfig, CacheStats};
75pub use error::{NlError, NlResult};
76pub use translator::{Translator, TranslatorConfig};
77pub use types::{
78 DisambiguationOption, ExtractedEntities, Intent, OutputFormat, SymbolKind, TranslationResponse,
79 ValidationStatus,
80};
81
82/// Crate version for model compatibility checks
83pub const VERSION: &str = env!("CARGO_PKG_VERSION");
84
85#[cfg(test)]
86mod tests {
87 use super::*;
88
89 #[test]
90 fn test_types_are_send_sync() {
91 fn assert_send<T: Send>() {}
92 fn assert_sync<T: Sync>() {}
93
94 // Core types must be thread-safe
95 assert_send::<Intent>();
96 assert_sync::<Intent>();
97 assert_send::<ValidationStatus>();
98 assert_sync::<ValidationStatus>();
99 assert_send::<SymbolKind>();
100 assert_sync::<SymbolKind>();
101 assert_send::<OutputFormat>();
102 assert_sync::<OutputFormat>();
103 assert_send::<ExtractedEntities>();
104 assert_sync::<ExtractedEntities>();
105 assert_send::<TranslationResponse>();
106 assert_sync::<TranslationResponse>();
107 }
108
109 #[test]
110 fn test_version_available() {
111 // VERSION is always non-empty (from Cargo.toml) but we test
112 // that the package version is properly exposed and matches expected format
113 assert!(
114 VERSION.contains('.'),
115 "VERSION should contain a dot separator"
116 );
117 }
118}