jpx_engine/lib.rs
1//! # jpx-engine
2//!
3//! Protocol-agnostic JMESPath query engine with 400+ functions.
4//!
5//! This crate provides the core "brain" of jpx - everything you can do with JMESPath
6//! beyond basic compile and evaluate. It's designed to be transport-agnostic, allowing
7//! the CLI (`jpx`), MCP server (`jpx-server`), or any future REST/gRPC adapters to be
8//! thin wrappers over this engine.
9//!
10//! ## Features
11//!
12//! | Category | Description |
13//! |----------|-------------|
14//! | **Evaluation** | Single, batch, and string-based evaluation with validation |
15//! | **Introspection** | List functions, search by keyword, describe, find similar |
16//! | **Discovery** | Cross-server tool discovery with BM25 search indexing |
17//! | **Query Store** | Named queries for session-scoped reuse |
18//! | **JSON Utilities** | Format, diff, patch, merge, stats, paths, keys |
19//! | **Arrow** | Apache Arrow conversion (optional, via `arrow` feature) |
20//!
21//! ## Cargo Features
22//!
23//! - **`arrow`** - Enables Apache Arrow support for columnar data conversion.
24//! This adds the [`arrow`] module with functions to convert between Arrow
25//! RecordBatches and JSON Values. Used by the CLI for Parquet I/O.
26//!
27//! ## Quick Start
28//!
29//! ```rust
30//! use jpx_engine::JpxEngine;
31//! use serde_json::json;
32//!
33//! let engine = JpxEngine::new();
34//!
35//! // Evaluate a JMESPath expression
36//! let result = engine.evaluate("users[*].name", &json!({
37//! "users": [{"name": "alice"}, {"name": "bob"}]
38//! })).unwrap();
39//! assert_eq!(result, json!(["alice", "bob"]));
40//! ```
41//!
42//! ## Evaluation
43//!
44//! The engine supports multiple evaluation modes:
45//!
46//! ```rust
47//! use jpx_engine::JpxEngine;
48//! use serde_json::json;
49//!
50//! let engine = JpxEngine::new();
51//!
52//! // From parsed JSON
53//! let data = json!({"items": [1, 2, 3]});
54//! let result = engine.evaluate("length(items)", &data).unwrap();
55//! assert_eq!(result, json!(3));
56//!
57//! // From JSON string
58//! let result = engine.evaluate_str("length(@)", r#"[1, 2, 3]"#).unwrap();
59//! assert_eq!(result, json!(3));
60//!
61//! // Batch evaluation (multiple expressions, same input)
62//! let exprs = vec!["a".to_string(), "b".to_string()];
63//! let batch = engine.batch_evaluate(&exprs, &json!({"a": 1, "b": 2}));
64//! assert_eq!(batch.results[0].result, Some(json!(1)));
65//!
66//! // Validation without evaluation
67//! let valid = engine.validate("users[*].name");
68//! assert!(valid.valid);
69//! ```
70//!
71//! ## Function Introspection
72//!
73//! Discover and explore the 400+ available functions:
74//!
75//! ```rust
76//! use jpx_engine::JpxEngine;
77//!
78//! let engine = JpxEngine::new();
79//!
80//! // List all categories
81//! let categories = engine.categories();
82//! assert!(categories.contains(&"String".to_string()));
83//!
84//! // List functions in a category
85//! let string_funcs = engine.functions(Some("String"));
86//! assert!(string_funcs.iter().any(|f| f.name == "upper"));
87//!
88//! // Search by keyword (fuzzy matching, synonyms)
89//! let results = engine.search_functions("upper", 5);
90//! assert!(results.iter().any(|r| r.function.name == "upper"));
91//!
92//! // Get detailed function info
93//! let info = engine.describe_function("upper").unwrap();
94//! assert_eq!(info.category, "String");
95//!
96//! // Find similar functions
97//! let similar = engine.similar_functions("upper").unwrap();
98//! assert!(!similar.same_category.is_empty());
99//! ```
100//!
101//! ## JSON Utilities
102//!
103//! Beyond JMESPath evaluation, the engine provides JSON manipulation tools:
104//!
105//! ```rust
106//! use jpx_engine::JpxEngine;
107//!
108//! let engine = JpxEngine::new();
109//!
110//! // Pretty-print JSON
111//! let formatted = engine.format_json(r#"{"a":1}"#, 2).unwrap();
112//! assert!(formatted.contains('\n'));
113//!
114//! // Generate JSON Patch (RFC 6902)
115//! let patch = engine.diff(r#"{"a": 1}"#, r#"{"a": 2}"#).unwrap();
116//!
117//! // Apply JSON Patch
118//! let result = engine.patch(
119//! r#"{"a": 1}"#,
120//! r#"[{"op": "replace", "path": "/a", "value": 2}]"#
121//! ).unwrap();
122//!
123//! // Apply JSON Merge Patch (RFC 7396)
124//! let merged = engine.merge(
125//! r#"{"a": 1, "b": 2}"#,
126//! r#"{"b": 3, "c": 4}"#
127//! ).unwrap();
128//!
129//! // Analyze JSON structure
130//! let stats = engine.stats(r#"[1, 2, 3]"#).unwrap();
131//! assert_eq!(stats.root_type, "array");
132//! ```
133//!
134//! ## Query Store
135//!
136//! Store and reuse named queries within a session:
137//!
138//! ```rust
139//! use jpx_engine::JpxEngine;
140//! use serde_json::json;
141//!
142//! let engine = JpxEngine::new();
143//!
144//! // Define a reusable query
145//! engine.define_query(
146//! "active_users".to_string(),
147//! "users[?active].name".to_string(),
148//! Some("Get names of active users".to_string())
149//! ).unwrap();
150//!
151//! // Run it by name
152//! let data = json!({"users": [
153//! {"name": "alice", "active": true},
154//! {"name": "bob", "active": false}
155//! ]});
156//! let result = engine.run_query("active_users", &data).unwrap();
157//! assert_eq!(result, json!(["alice"]));
158//!
159//! // List all stored queries
160//! let queries = engine.list_queries().unwrap();
161//! assert_eq!(queries.len(), 1);
162//! ```
163//!
164//! ## Tool Discovery
165//!
166//! Register and search tools across multiple servers (for MCP integration):
167//!
168//! ```rust
169//! use jpx_engine::{JpxEngine, DiscoverySpec};
170//! use serde_json::json;
171//!
172//! let engine = JpxEngine::new();
173//!
174//! // Register a server's tools
175//! let spec: DiscoverySpec = serde_json::from_value(json!({
176//! "server": {"name": "my-server", "version": "1.0.0"},
177//! "tools": [
178//! {"name": "create_user", "description": "Create a new user", "tags": ["write"]}
179//! ]
180//! })).unwrap();
181//!
182//! let result = engine.register_discovery(spec, false).unwrap();
183//! assert!(result.ok);
184//!
185//! // Search across registered tools
186//! let tools = engine.query_tools("user", 10).unwrap();
187//! assert!(!tools.is_empty());
188//! ```
189//!
190//! ## Strict Mode
191//!
192//! For standard JMESPath compliance without extensions:
193//!
194//! ```rust
195//! use jpx_engine::JpxEngine;
196//! use serde_json::json;
197//!
198//! let engine = JpxEngine::strict();
199//! assert!(engine.is_strict());
200//!
201//! // Standard functions work
202//! let result = engine.evaluate("length(@)", &json!([1, 2, 3])).unwrap();
203//! assert_eq!(result, json!(3));
204//!
205//! // Extension functions are not available for evaluation
206//! // (but introspection still works for documentation purposes)
207//! ```
208//!
209//! ## Architecture
210//!
211//! ```text
212//! jmespath-extensions (400+ functions, registry)
213//! |
214//! jpx-engine (this crate - evaluation, search, discovery)
215//! |
216//! +----+----+
217//! | |
218//! jpx jpx-server (CLI and MCP transport)
219//! ```
220//!
221//! ## Thread Safety
222//!
223//! The engine uses interior mutability (`Arc<RwLock<...>>`) for the discovery
224//! registry and query store, making it safe to share across threads. The function
225//! registry is immutable after construction.
226
227mod bm25;
228mod discovery;
229mod error;
230mod query_store;
231mod types;
232
233#[cfg(feature = "arrow")]
234pub mod arrow;
235
236pub use bm25::{Bm25Index, DocInfo, IndexOptions, SearchResult as Bm25SearchResult, TermInfo};
237pub use discovery::{
238 CategoryInfo, CategorySummary, DiscoveryRegistry, DiscoverySpec, ExampleSpec, IndexStats,
239 ParamSpec, RegistrationResult, ReturnSpec, ServerInfo, ServerSummary, ToolQueryResult,
240 ToolSpec,
241};
242pub use error::{EngineError, Result};
243pub use query_store::{QueryStore, StoredQuery};
244pub use types::{
245 BatchEvaluateResult, BatchExpressionResult, EvalRequest, EvalResponse, ValidationResult,
246};
247
248use jmespath::Runtime;
249use jmespath_extensions::register_all;
250use jmespath_extensions::registry::{FunctionRegistry, expand_search_terms, lookup_synonyms};
251use serde::{Deserialize, Serialize};
252use serde_json::Value;
253use std::collections::HashMap;
254use std::sync::{Arc, RwLock};
255use strsim::jaro_winkler;
256
257// Re-export commonly used types from jmespath_extensions
258pub use jmespath_extensions::registry::{Category, FunctionInfo};
259
260/// Detailed information about a JMESPath function.
261///
262/// This struct provides a serializable representation of function metadata,
263/// suitable for API responses, documentation generation, and introspection tools.
264///
265/// # Example
266///
267/// ```rust
268/// use jpx_engine::JpxEngine;
269///
270/// let engine = JpxEngine::new();
271/// let info = engine.describe_function("upper").unwrap();
272///
273/// println!("Function: {}", info.name);
274/// println!("Category: {}", info.category);
275/// println!("Signature: {}", info.signature);
276/// println!("Example: {}", info.example);
277/// ```
278#[derive(Debug, Clone, Serialize, Deserialize)]
279pub struct FunctionDetail {
280 /// Function name (e.g., "upper", "sum", "now")
281 pub name: String,
282 /// Category name (e.g., "String", "Math", "Datetime")
283 pub category: String,
284 /// Human-readable description of what the function does
285 pub description: String,
286 /// Function signature showing parameter types (e.g., "string -> string")
287 pub signature: String,
288 /// Example usage demonstrating the function
289 pub example: String,
290 /// Whether this is a standard JMESPath function (vs extension)
291 pub is_standard: bool,
292 /// JMESPath Enhancement Proposal number, if applicable
293 #[serde(skip_serializing_if = "Option::is_none")]
294 pub jep: Option<String>,
295 /// Alternative names for this function
296 #[serde(default, skip_serializing_if = "Vec::is_empty")]
297 pub aliases: Vec<String>,
298}
299
300impl From<&FunctionInfo> for FunctionDetail {
301 fn from(info: &FunctionInfo) -> Self {
302 Self {
303 name: info.name.to_string(),
304 category: format!("{:?}", info.category),
305 description: info.description.to_string(),
306 signature: info.signature.to_string(),
307 example: info.example.to_string(),
308 is_standard: info.is_standard,
309 jep: info.jep.map(|s| s.to_string()),
310 aliases: info.aliases.iter().map(|s| s.to_string()).collect(),
311 }
312 }
313}
314
315/// Result from searching for functions.
316///
317/// Contains the matched function along with information about how it matched
318/// the search query and its relevance score.
319///
320/// # Scoring
321///
322/// Match types and approximate scores:
323/// - `exact_name` (1000): Query exactly matches function name
324/// - `alias` (900): Query matches a function alias
325/// - `name_prefix` (800): Function name starts with query
326/// - `name_contains` (700): Function name contains query
327/// - `category` (600): Query matches category name
328/// - `description` (100-300): Query found in description
329/// - `fuzzy_name` (variable): Jaro-Winkler similarity > 0.8
330/// - `synonym` (300): Query synonym found in name/description
331#[derive(Debug, Clone, Serialize, Deserialize)]
332pub struct SearchResult {
333 /// The matched function's details
334 pub function: FunctionDetail,
335 /// How the function matched (e.g., "exact_name", "description")
336 pub match_type: String,
337 /// Relevance score (higher = better match)
338 pub score: i32,
339}
340
341/// Result from finding functions similar to a given function.
342///
343/// Groups similar functions by relationship type: same category,
344/// similar signature, or related concepts in descriptions.
345///
346/// # Example
347///
348/// ```rust
349/// use jpx_engine::JpxEngine;
350///
351/// let engine = JpxEngine::new();
352/// let similar = engine.similar_functions("upper").unwrap();
353///
354/// // Functions in the same category (String)
355/// for f in &similar.same_category {
356/// println!("Same category: {}", f.name);
357/// }
358///
359/// // Functions with similar signatures
360/// for f in &similar.similar_signature {
361/// println!("Similar signature: {}", f.name);
362/// }
363/// ```
364#[derive(Debug, Clone, Serialize, Deserialize)]
365pub struct SimilarFunctionsResult {
366 /// Functions in the same category as the target
367 #[serde(default, skip_serializing_if = "Vec::is_empty")]
368 pub same_category: Vec<FunctionDetail>,
369 /// Functions with similar parameter/return types
370 #[serde(default, skip_serializing_if = "Vec::is_empty")]
371 pub similar_signature: Vec<FunctionDetail>,
372 /// Functions with overlapping description keywords
373 #[serde(default, skip_serializing_if = "Vec::is_empty")]
374 pub related_concepts: Vec<FunctionDetail>,
375}
376
377/// Statistics about JSON data structure.
378///
379/// Provides insights into JSON data including type, size, depth, and
380/// detailed field analysis for arrays of objects.
381///
382/// # Example
383///
384/// ```rust
385/// use jpx_engine::JpxEngine;
386///
387/// let engine = JpxEngine::new();
388/// let stats = engine.stats(r#"{"users": [{"name": "alice"}, {"name": "bob"}]}"#).unwrap();
389///
390/// println!("Type: {}", stats.root_type); // "object"
391/// println!("Size: {}", stats.size_human); // "52 bytes"
392/// println!("Depth: {}", stats.depth); // 3
393/// ```
394#[derive(Debug, Clone, Serialize, Deserialize)]
395pub struct StatsResult {
396 /// JSON type of the root value ("object", "array", "string", etc.)
397 pub root_type: String,
398 /// Size of the JSON string in bytes
399 pub size_bytes: usize,
400 /// Human-readable size (e.g., "1.5 KB", "2.3 MB")
401 pub size_human: String,
402 /// Maximum nesting depth (0 for primitives)
403 pub depth: usize,
404 /// Number of items (arrays only)
405 #[serde(skip_serializing_if = "Option::is_none")]
406 pub length: Option<usize>,
407 /// Number of keys (objects only)
408 #[serde(skip_serializing_if = "Option::is_none")]
409 pub key_count: Option<usize>,
410 /// Field analysis (arrays of objects only)
411 #[serde(skip_serializing_if = "Option::is_none")]
412 pub fields: Option<Vec<FieldAnalysis>>,
413 /// Count of each JSON type in array (arrays only)
414 #[serde(skip_serializing_if = "Option::is_none")]
415 pub type_distribution: Option<HashMap<String, usize>>,
416}
417
418/// Analysis of a field across an array of objects.
419///
420/// Used by [`StatsResult`] to provide insights into consistent fields
421/// in arrays of objects, including type information and null counts.
422#[derive(Debug, Clone, Serialize, Deserialize)]
423pub struct FieldAnalysis {
424 /// Field name (key)
425 pub name: String,
426 /// Most common type for this field
427 pub field_type: String,
428 /// Number of objects where this field is null
429 pub null_count: usize,
430 /// Number of distinct values (omitted for high-cardinality fields)
431 #[serde(skip_serializing_if = "Option::is_none")]
432 pub unique_count: Option<usize>,
433}
434
435/// Information about a path in a JSON structure.
436///
437/// Used by [`JpxEngine::paths`] to enumerate all paths in a JSON document.
438///
439/// # Example
440///
441/// ```rust
442/// use jpx_engine::JpxEngine;
443///
444/// let engine = JpxEngine::new();
445/// let paths = engine.paths(r#"{"user": {"name": "alice"}}"#, true, false).unwrap();
446///
447/// for path in paths {
448/// println!("{}: {:?}", path.path, path.path_type);
449/// }
450/// // Output:
451/// // @: Some("object")
452/// // user: Some("object")
453/// // user.name: Some("string")
454/// ```
455#[derive(Debug, Clone, Serialize, Deserialize)]
456pub struct PathInfo {
457 /// Path in dot notation (e.g., "user.name", "items.0.id")
458 pub path: String,
459 /// JSON type at this path (if `include_types` was true)
460 #[serde(skip_serializing_if = "Option::is_none")]
461 pub path_type: Option<String>,
462 /// Value at this path (if `include_values` was true, leaf nodes only)
463 #[serde(skip_serializing_if = "Option::is_none")]
464 pub value: Option<Value>,
465}
466
467/// The JMESPath query engine.
468///
469/// `JpxEngine` is the main entry point for all jpx functionality. It combines:
470///
471/// - **JMESPath runtime** with 400+ extension functions
472/// - **Function registry** for introspection and search
473/// - **Discovery registry** for cross-server tool indexing
474/// - **Query store** for named query management
475///
476/// # Construction
477///
478/// ```rust
479/// use jpx_engine::JpxEngine;
480///
481/// // Full engine with all extensions
482/// let engine = JpxEngine::new();
483///
484/// // Strict mode (standard JMESPath only)
485/// let strict_engine = JpxEngine::strict();
486///
487/// // Or using Default
488/// let default_engine = JpxEngine::default();
489/// ```
490///
491/// # Thread Safety
492///
493/// The engine is designed to be shared across threads. The discovery registry
494/// and query store use `Arc<RwLock<...>>` for interior mutability, while the
495/// function registry is immutable after construction.
496///
497/// ```rust
498/// use jpx_engine::JpxEngine;
499/// use std::sync::Arc;
500///
501/// let engine = Arc::new(JpxEngine::new());
502///
503/// // Clone the Arc to share across threads
504/// let engine_clone = Arc::clone(&engine);
505/// std::thread::spawn(move || {
506/// let result = engine_clone.evaluate("length(@)", &serde_json::json!([1, 2, 3]));
507/// });
508/// ```
509pub struct JpxEngine {
510 /// JMESPath runtime with all extensions registered
511 runtime: Runtime,
512 /// Function registry for introspection
513 registry: FunctionRegistry,
514 /// Discovery registry for cross-server tool search
515 discovery: Arc<RwLock<DiscoveryRegistry>>,
516 /// Query store for named queries
517 queries: Arc<RwLock<QueryStore>>,
518 /// Whether to use strict mode (standard JMESPath only)
519 strict: bool,
520}
521
522impl JpxEngine {
523 /// Creates a new engine with all extension functions enabled.
524 ///
525 /// This is the standard way to create an engine with full functionality,
526 /// including all 400+ extension functions.
527 ///
528 /// # Example
529 ///
530 /// ```rust
531 /// use jpx_engine::JpxEngine;
532 /// use serde_json::json;
533 ///
534 /// let engine = JpxEngine::new();
535 ///
536 /// // Standard JMESPath works
537 /// let result = engine.evaluate("name", &json!({"name": "alice"})).unwrap();
538 /// assert_eq!(result, json!("alice"));
539 ///
540 /// // Extension functions also work
541 /// let result = engine.evaluate("upper(name)", &json!({"name": "alice"})).unwrap();
542 /// assert_eq!(result, json!("ALICE"));
543 /// ```
544 pub fn new() -> Self {
545 Self::with_options(false)
546 }
547
548 /// Creates a new engine with configurable strict mode.
549 ///
550 /// # Arguments
551 ///
552 /// * `strict` - If `true`, only standard JMESPath functions are available
553 /// for evaluation. Introspection features still show all functions.
554 ///
555 /// # Example
556 ///
557 /// ```rust
558 /// use jpx_engine::JpxEngine;
559 ///
560 /// // Create engine with extensions
561 /// let full_engine = JpxEngine::with_options(false);
562 ///
563 /// // Create strict engine (standard JMESPath only)
564 /// let strict_engine = JpxEngine::with_options(true);
565 /// assert!(strict_engine.is_strict());
566 /// ```
567 pub fn with_options(strict: bool) -> Self {
568 let mut runtime = Runtime::new();
569 runtime.register_builtin_functions();
570 if !strict {
571 register_all(&mut runtime);
572 }
573
574 let mut registry = FunctionRegistry::new();
575 registry.register_all();
576
577 Self {
578 runtime,
579 registry,
580 discovery: Arc::new(RwLock::new(DiscoveryRegistry::new())),
581 queries: Arc::new(RwLock::new(QueryStore::new())),
582 strict,
583 }
584 }
585
586 /// Creates a new engine in strict mode (standard JMESPath only).
587 ///
588 /// Equivalent to `JpxEngine::with_options(true)`.
589 ///
590 /// # Example
591 ///
592 /// ```rust
593 /// use jpx_engine::JpxEngine;
594 /// use serde_json::json;
595 ///
596 /// let engine = JpxEngine::strict();
597 ///
598 /// // Standard functions work
599 /// let result = engine.evaluate("length(@)", &json!([1, 2, 3])).unwrap();
600 /// assert_eq!(result, json!(3));
601 /// ```
602 pub fn strict() -> Self {
603 Self::with_options(true)
604 }
605
606 /// Returns `true` if the engine is in strict mode.
607 ///
608 /// In strict mode, only standard JMESPath functions are available for
609 /// evaluation. Extension functions will cause evaluation errors.
610 pub fn is_strict(&self) -> bool {
611 self.strict
612 }
613
614 /// Returns a reference to the underlying JMESPath runtime.
615 ///
616 /// This provides access to the low-level runtime for advanced use cases.
617 pub fn runtime(&self) -> &Runtime {
618 &self.runtime
619 }
620
621 /// Returns a reference to the function registry.
622 ///
623 /// The registry contains metadata about all available functions and can
624 /// be used for custom introspection beyond what the engine methods provide.
625 pub fn registry(&self) -> &FunctionRegistry {
626 &self.registry
627 }
628
629 /// Returns a reference to the discovery registry.
630 ///
631 /// The discovery registry manages cross-server tool indexing and search.
632 /// Access is through `Arc<RwLock<...>>` for thread-safe mutation.
633 pub fn discovery(&self) -> &Arc<RwLock<DiscoveryRegistry>> {
634 &self.discovery
635 }
636
637 /// Returns a reference to the query store.
638 ///
639 /// The query store manages named queries for the session.
640 /// Access is through `Arc<RwLock<...>>` for thread-safe mutation.
641 pub fn queries(&self) -> &Arc<RwLock<QueryStore>> {
642 &self.queries
643 }
644
645 // =========================================================================
646 // Evaluation methods
647 // =========================================================================
648
649 /// Evaluates a JMESPath expression against JSON input.
650 ///
651 /// This is the primary method for running JMESPath queries. The expression
652 /// is compiled and executed against the provided JSON value.
653 ///
654 /// # Arguments
655 ///
656 /// * `expression` - A JMESPath expression string
657 /// * `input` - JSON data to query
658 ///
659 /// # Errors
660 ///
661 /// Returns [`EngineError::InvalidExpression`] if the expression has syntax errors,
662 /// or [`EngineError::EvaluationFailed`] if evaluation fails (e.g., calling an
663 /// undefined function in strict mode).
664 ///
665 /// # Example
666 ///
667 /// ```rust
668 /// use jpx_engine::JpxEngine;
669 /// use serde_json::json;
670 ///
671 /// let engine = JpxEngine::new();
672 ///
673 /// // Simple field access
674 /// let result = engine.evaluate("name", &json!({"name": "alice"})).unwrap();
675 /// assert_eq!(result, json!("alice"));
676 ///
677 /// // Array projection with function
678 /// let result = engine.evaluate("users[*].name | sort(@)", &json!({
679 /// "users": [{"name": "charlie"}, {"name": "alice"}, {"name": "bob"}]
680 /// })).unwrap();
681 /// assert_eq!(result, json!(["alice", "bob", "charlie"]));
682 /// ```
683 pub fn evaluate(&self, expression: &str, input: &Value) -> Result<Value> {
684 let expr = self
685 .runtime
686 .compile(expression)
687 .map_err(|e| EngineError::InvalidExpression(e.to_string()))?;
688
689 // Convert input Value to Variable for jmespath
690 let var = jmespath::Variable::from_json(&input.to_string())
691 .map_err(|e| EngineError::InvalidJson(e.to_string()))?;
692
693 let result = expr
694 .search(&var)
695 .map_err(|e| EngineError::EvaluationFailed(e.to_string()))?;
696
697 // Convert Rcvar to Value
698 let value: Value = serde_json::to_value(result.as_ref())
699 .map_err(|e| EngineError::EvaluationFailed(e.to_string()))?;
700
701 Ok(value)
702 }
703
704 /// Evaluates a JMESPath expression against a JSON string.
705 ///
706 /// Convenience method that parses the JSON string before evaluation.
707 ///
708 /// # Errors
709 ///
710 /// Returns [`EngineError::InvalidJson`] if the input is not valid JSON,
711 /// or evaluation errors as with [`evaluate`](Self::evaluate).
712 ///
713 /// # Example
714 ///
715 /// ```rust
716 /// use jpx_engine::JpxEngine;
717 /// use serde_json::json;
718 ///
719 /// let engine = JpxEngine::new();
720 /// let result = engine.evaluate_str("length(@)", r#"[1, 2, 3, 4, 5]"#).unwrap();
721 /// assert_eq!(result, json!(5));
722 /// ```
723 pub fn evaluate_str(&self, expression: &str, input: &str) -> Result<Value> {
724 let json: Value =
725 serde_json::from_str(input).map_err(|e| EngineError::InvalidJson(e.to_string()))?;
726 self.evaluate(expression, &json)
727 }
728
729 /// Evaluates multiple expressions against the same input.
730 ///
731 /// Useful for extracting multiple values from a document in one call.
732 /// Each expression is evaluated independently; failures don't affect other expressions.
733 ///
734 /// # Example
735 ///
736 /// ```rust
737 /// use jpx_engine::JpxEngine;
738 /// use serde_json::json;
739 ///
740 /// let engine = JpxEngine::new();
741 /// let input = json!({"name": "alice", "age": 30, "active": true});
742 ///
743 /// let exprs = vec![
744 /// "name".to_string(),
745 /// "age".to_string(),
746 /// "missing".to_string(), // Returns null, not an error
747 /// ];
748 ///
749 /// let results = engine.batch_evaluate(&exprs, &input);
750 /// assert_eq!(results.results[0].result, Some(json!("alice")));
751 /// assert_eq!(results.results[1].result, Some(json!(30)));
752 /// assert_eq!(results.results[2].result, Some(json!(null)));
753 /// ```
754 pub fn batch_evaluate(&self, expressions: &[String], input: &Value) -> BatchEvaluateResult {
755 let results = expressions
756 .iter()
757 .map(|expr| match self.evaluate(expr, input) {
758 Ok(result) => BatchExpressionResult {
759 expression: expr.clone(),
760 result: Some(result),
761 error: None,
762 },
763 Err(e) => BatchExpressionResult {
764 expression: expr.clone(),
765 result: None,
766 error: Some(e.to_string()),
767 },
768 })
769 .collect();
770
771 BatchEvaluateResult { results }
772 }
773
774 /// Validates a JMESPath expression without evaluating it.
775 ///
776 /// Checks if an expression has valid syntax without needing input data.
777 /// Useful for validating user-provided expressions before storing them.
778 ///
779 /// # Example
780 ///
781 /// ```rust
782 /// use jpx_engine::JpxEngine;
783 ///
784 /// let engine = JpxEngine::new();
785 ///
786 /// // Valid expression
787 /// let result = engine.validate("users[*].name | sort(@)");
788 /// assert!(result.valid);
789 /// assert!(result.error.is_none());
790 ///
791 /// // Invalid expression (unclosed bracket)
792 /// let result = engine.validate("users[*.name");
793 /// assert!(!result.valid);
794 /// assert!(result.error.is_some());
795 /// ```
796 pub fn validate(&self, expression: &str) -> ValidationResult {
797 match jmespath::compile(expression) {
798 Ok(_) => ValidationResult {
799 valid: true,
800 error: None,
801 },
802 Err(e) => ValidationResult {
803 valid: false,
804 error: Some(e.to_string()),
805 },
806 }
807 }
808
809 // =========================================================================
810 // Introspection methods
811 // =========================================================================
812
813 /// Lists all available function categories.
814 ///
815 /// Returns category names like "String", "Math", "Datetime", etc.
816 ///
817 /// # Example
818 ///
819 /// ```rust
820 /// use jpx_engine::JpxEngine;
821 ///
822 /// let engine = JpxEngine::new();
823 /// let categories = engine.categories();
824 ///
825 /// assert!(categories.contains(&"String".to_string()));
826 /// assert!(categories.contains(&"Math".to_string()));
827 /// assert!(categories.contains(&"Array".to_string()));
828 /// ```
829 pub fn categories(&self) -> Vec<String> {
830 Category::all().iter().map(|c| format!("{:?}", c)).collect()
831 }
832
833 /// Lists functions, optionally filtered by category.
834 ///
835 /// # Arguments
836 ///
837 /// * `category` - Optional category name to filter by (case-insensitive)
838 ///
839 /// # Example
840 ///
841 /// ```rust
842 /// use jpx_engine::JpxEngine;
843 ///
844 /// let engine = JpxEngine::new();
845 ///
846 /// // All functions
847 /// let all = engine.functions(None);
848 /// assert!(all.len() > 100);
849 ///
850 /// // Just string functions
851 /// let string_funcs = engine.functions(Some("String"));
852 /// assert!(string_funcs.iter().all(|f| f.category == "String"));
853 /// ```
854 pub fn functions(&self, category: Option<&str>) -> Vec<FunctionDetail> {
855 match category.and_then(parse_category) {
856 Some(cat) => self
857 .registry
858 .functions_in_category(cat)
859 .map(FunctionDetail::from)
860 .collect(),
861 None => self
862 .registry
863 .functions()
864 .map(FunctionDetail::from)
865 .collect(),
866 }
867 }
868
869 /// Gets detailed information about a function by name or alias.
870 ///
871 /// # Arguments
872 ///
873 /// * `name` - Function name or alias (e.g., "upper", "md5", "len")
874 ///
875 /// # Returns
876 ///
877 /// `Some(FunctionDetail)` if found, `None` if no matching function exists.
878 ///
879 /// # Example
880 ///
881 /// ```rust
882 /// use jpx_engine::JpxEngine;
883 ///
884 /// let engine = JpxEngine::new();
885 ///
886 /// let info = engine.describe_function("upper").unwrap();
887 /// assert_eq!(info.category, "String");
888 /// println!("Signature: {}", info.signature);
889 /// println!("Example: {}", info.example);
890 ///
891 /// // Also works with aliases
892 /// let info = engine.describe_function("len"); // alias for "length"
893 /// ```
894 pub fn describe_function(&self, name: &str) -> Option<FunctionDetail> {
895 self.registry.get_function(name).map(FunctionDetail::from)
896 }
897
898 /// Searches for functions matching a query string.
899 ///
900 /// Uses fuzzy matching, synonyms, and searches across names, descriptions,
901 /// categories, and signatures. Results are ranked by relevance.
902 ///
903 /// # Arguments
904 ///
905 /// * `query` - Search term (e.g., "hash", "string manipulation", "date")
906 /// * `limit` - Maximum number of results to return
907 ///
908 /// # Example
909 ///
910 /// ```rust
911 /// use jpx_engine::JpxEngine;
912 ///
913 /// let engine = JpxEngine::new();
914 ///
915 /// // Search by concept
916 /// let results = engine.search_functions("hash", 10);
917 /// assert!(results.iter().any(|r| r.function.name == "md5"));
918 /// assert!(results.iter().any(|r| r.function.name == "sha256"));
919 ///
920 /// // Results are ranked by relevance
921 /// for result in &results {
922 /// println!("{}: {} (score: {})",
923 /// result.function.name,
924 /// result.match_type,
925 /// result.score
926 /// );
927 /// }
928 /// ```
929 pub fn search_functions(&self, query: &str, limit: usize) -> Vec<SearchResult> {
930 let query_lower = query.to_lowercase();
931
932 // Expand query terms using synonyms
933 let expanded_terms = expand_search_terms(&query_lower);
934
935 let all_functions: Vec<_> = self.registry.functions().collect();
936 let mut results: Vec<SearchResult> = Vec::new();
937
938 for info in &all_functions {
939 let name_lower = info.name.to_lowercase();
940 let desc_lower = info.description.to_lowercase();
941 let category_lower = format!("{:?}", info.category).to_lowercase();
942 let signature_lower = info.signature.to_lowercase();
943 let aliases_lower: Vec<String> = info
944 .aliases
945 .iter()
946 .map(|a: &&str| a.to_lowercase())
947 .collect();
948
949 // Calculate match score and type
950 let (score, match_type) = calculate_match_score(
951 &query_lower,
952 &expanded_terms,
953 &MatchContext {
954 name: &name_lower,
955 aliases: &aliases_lower,
956 category: &category_lower,
957 description: &desc_lower,
958 signature: &signature_lower,
959 },
960 );
961
962 if score > 0 {
963 results.push(SearchResult {
964 function: FunctionDetail::from(*info),
965 match_type,
966 score,
967 });
968 }
969 }
970
971 // Sort by score descending, then by name
972 results.sort_by(|a, b| {
973 b.score
974 .cmp(&a.score)
975 .then_with(|| a.function.name.cmp(&b.function.name))
976 });
977
978 results.truncate(limit);
979 results
980 }
981
982 /// Finds functions similar to a given function.
983 ///
984 /// Returns functions grouped by relationship type:
985 /// - Same category (e.g., other string functions if input is "upper")
986 /// - Similar signature (same parameter/return types)
987 /// - Related concepts (overlapping description keywords)
988 ///
989 /// # Arguments
990 ///
991 /// * `name` - Name of the function to find similar functions for
992 ///
993 /// # Returns
994 ///
995 /// `Some(SimilarFunctionsResult)` if the function exists, `None` otherwise.
996 ///
997 /// # Example
998 ///
999 /// ```rust
1000 /// use jpx_engine::JpxEngine;
1001 ///
1002 /// let engine = JpxEngine::new();
1003 ///
1004 /// let similar = engine.similar_functions("upper").unwrap();
1005 ///
1006 /// // Other string functions
1007 /// println!("Same category:");
1008 /// for f in &similar.same_category {
1009 /// println!(" - {}", f.name);
1010 /// }
1011 /// ```
1012 pub fn similar_functions(&self, name: &str) -> Option<SimilarFunctionsResult> {
1013 let info = self.registry.get_function(name)?;
1014 let all_functions: Vec<_> = self.registry.functions().collect();
1015
1016 // Same category
1017 let same_category: Vec<FunctionDetail> = all_functions
1018 .iter()
1019 .filter(|f| f.category == info.category && f.name != info.name)
1020 .take(5)
1021 .map(|f| FunctionDetail::from(*f))
1022 .collect();
1023
1024 // Similar signature (same arity)
1025 let this_arity = count_params(info.signature);
1026 let similar_signature: Vec<FunctionDetail> = all_functions
1027 .iter()
1028 .filter(|f| {
1029 f.name != info.name
1030 && f.category != info.category
1031 && count_params(f.signature) == this_arity
1032 })
1033 .take(5)
1034 .map(|f| FunctionDetail::from(*f))
1035 .collect();
1036
1037 // Related concepts (description keyword overlap)
1038 let keywords = extract_keywords(info.description);
1039 let mut concept_scores: Vec<(&FunctionInfo, usize)> = all_functions
1040 .iter()
1041 .filter(|f| f.name != info.name)
1042 .map(|f| {
1043 let f_keywords = extract_keywords(f.description);
1044 let overlap = keywords.iter().filter(|k| f_keywords.contains(*k)).count();
1045 (*f, overlap)
1046 })
1047 .filter(|(_, score)| *score > 0)
1048 .collect();
1049
1050 concept_scores.sort_by(|a, b| b.1.cmp(&a.1));
1051
1052 let related_concepts: Vec<FunctionDetail> = concept_scores
1053 .into_iter()
1054 .take(5)
1055 .map(|(f, _)| FunctionDetail::from(f))
1056 .collect();
1057
1058 Some(SimilarFunctionsResult {
1059 same_category,
1060 similar_signature,
1061 related_concepts,
1062 })
1063 }
1064
1065 // =========================================================================
1066 // JSON utility methods
1067 // =========================================================================
1068
1069 /// Format JSON with indentation.
1070 pub fn format_json(&self, input: &str, indent: usize) -> Result<String> {
1071 let value: Value =
1072 serde_json::from_str(input).map_err(|e| EngineError::InvalidJson(e.to_string()))?;
1073
1074 if indent == 0 {
1075 serde_json::to_string(&value).map_err(|e| EngineError::Internal(e.to_string()))
1076 } else {
1077 let indent_bytes = vec![b' '; indent];
1078 let formatter = serde_json::ser::PrettyFormatter::with_indent(&indent_bytes);
1079 let mut buf = Vec::new();
1080 let mut ser = serde_json::Serializer::with_formatter(&mut buf, formatter);
1081 value
1082 .serialize(&mut ser)
1083 .map_err(|e| EngineError::Internal(e.to_string()))?;
1084 String::from_utf8(buf).map_err(|e| EngineError::Internal(e.to_string()))
1085 }
1086 }
1087
1088 /// Generate a JSON Patch (RFC 6902) from source to target.
1089 pub fn diff(&self, source: &str, target: &str) -> Result<Value> {
1090 let source_val: Value =
1091 serde_json::from_str(source).map_err(|e| EngineError::InvalidJson(e.to_string()))?;
1092 let target_val: Value =
1093 serde_json::from_str(target).map_err(|e| EngineError::InvalidJson(e.to_string()))?;
1094
1095 let patch = json_patch::diff(&source_val, &target_val);
1096 serde_json::to_value(&patch).map_err(|e| EngineError::Internal(e.to_string()))
1097 }
1098
1099 /// Apply a JSON Patch (RFC 6902) to a document.
1100 pub fn patch(&self, input: &str, patch: &str) -> Result<Value> {
1101 let mut doc: Value =
1102 serde_json::from_str(input).map_err(|e| EngineError::InvalidJson(e.to_string()))?;
1103 let patch: json_patch::Patch =
1104 serde_json::from_str(patch).map_err(|e| EngineError::InvalidJson(e.to_string()))?;
1105
1106 json_patch::patch(&mut doc, &patch)
1107 .map_err(|e| EngineError::EvaluationFailed(e.to_string()))?;
1108
1109 Ok(doc)
1110 }
1111
1112 /// Apply a JSON Merge Patch (RFC 7396) to a document.
1113 pub fn merge(&self, input: &str, patch: &str) -> Result<Value> {
1114 let mut doc: Value =
1115 serde_json::from_str(input).map_err(|e| EngineError::InvalidJson(e.to_string()))?;
1116 let patch_val: Value =
1117 serde_json::from_str(patch).map_err(|e| EngineError::InvalidJson(e.to_string()))?;
1118
1119 json_patch::merge(&mut doc, &patch_val);
1120 Ok(doc)
1121 }
1122
1123 /// Extract keys from a JSON object.
1124 pub fn keys(&self, input: &str, recursive: bool) -> Result<Vec<String>> {
1125 let value: Value =
1126 serde_json::from_str(input).map_err(|e| EngineError::InvalidJson(e.to_string()))?;
1127
1128 let mut keys = Vec::new();
1129 if recursive {
1130 extract_keys_recursive(&value, "", &mut keys);
1131 } else if let Value::Object(map) = &value {
1132 keys = map.keys().cloned().collect();
1133 keys.sort();
1134 }
1135 Ok(keys)
1136 }
1137
1138 /// Extract all paths from JSON data.
1139 pub fn paths(
1140 &self,
1141 input: &str,
1142 include_types: bool,
1143 include_values: bool,
1144 ) -> Result<Vec<PathInfo>> {
1145 let value: Value =
1146 serde_json::from_str(input).map_err(|e| EngineError::InvalidJson(e.to_string()))?;
1147
1148 let mut paths = Vec::new();
1149 extract_paths(&value, "", include_types, include_values, &mut paths);
1150 Ok(paths)
1151 }
1152
1153 /// Analyze JSON data and return statistics.
1154 pub fn stats(&self, input: &str) -> Result<StatsResult> {
1155 let value: Value =
1156 serde_json::from_str(input).map_err(|e| EngineError::InvalidJson(e.to_string()))?;
1157
1158 let size_bytes = input.len();
1159 let depth = calculate_depth(&value);
1160
1161 let (length, key_count, fields, type_distribution) = match &value {
1162 Value::Array(arr) => {
1163 let type_dist = calculate_type_distribution(arr);
1164 let field_analysis = if arr.iter().all(|v| v.is_object()) {
1165 Some(analyze_array_fields(arr))
1166 } else {
1167 None
1168 };
1169 (Some(arr.len()), None, field_analysis, Some(type_dist))
1170 }
1171 Value::Object(map) => (None, Some(map.len()), None, None),
1172 _ => (None, None, None, None),
1173 };
1174
1175 Ok(StatsResult {
1176 root_type: json_type_name(&value).to_string(),
1177 size_bytes,
1178 size_human: format_size(size_bytes),
1179 depth,
1180 length,
1181 key_count,
1182 fields,
1183 type_distribution,
1184 })
1185 }
1186
1187 // =========================================================================
1188 // Query store methods
1189 // =========================================================================
1190
1191 /// Define (store) a named query.
1192 pub fn define_query(
1193 &self,
1194 name: String,
1195 expression: String,
1196 description: Option<String>,
1197 ) -> Result<Option<StoredQuery>> {
1198 // Validate expression first
1199 let validation = self.validate(&expression);
1200 if !validation.valid {
1201 return Err(EngineError::InvalidExpression(
1202 validation
1203 .error
1204 .unwrap_or_else(|| "Invalid expression".to_string()),
1205 ));
1206 }
1207
1208 let query = StoredQuery {
1209 name,
1210 expression,
1211 description,
1212 };
1213
1214 self.queries
1215 .write()
1216 .map_err(|e| EngineError::Internal(e.to_string()))?
1217 .define(query)
1218 .pipe(Ok)
1219 }
1220
1221 /// Get a stored query by name.
1222 pub fn get_query(&self, name: &str) -> Result<Option<StoredQuery>> {
1223 Ok(self
1224 .queries
1225 .read()
1226 .map_err(|e| EngineError::Internal(e.to_string()))?
1227 .get(name)
1228 .cloned())
1229 }
1230
1231 /// Delete a stored query.
1232 pub fn delete_query(&self, name: &str) -> Result<Option<StoredQuery>> {
1233 Ok(self
1234 .queries
1235 .write()
1236 .map_err(|e| EngineError::Internal(e.to_string()))?
1237 .delete(name))
1238 }
1239
1240 /// List all stored queries.
1241 pub fn list_queries(&self) -> Result<Vec<StoredQuery>> {
1242 Ok(self
1243 .queries
1244 .read()
1245 .map_err(|e| EngineError::Internal(e.to_string()))?
1246 .list()
1247 .into_iter()
1248 .cloned()
1249 .collect())
1250 }
1251
1252 /// Run a stored query.
1253 pub fn run_query(&self, name: &str, input: &Value) -> Result<Value> {
1254 let query = self
1255 .get_query(name)?
1256 .ok_or_else(|| EngineError::QueryNotFound(name.to_string()))?;
1257
1258 self.evaluate(&query.expression, input)
1259 }
1260
1261 // =========================================================================
1262 // Discovery methods
1263 // =========================================================================
1264
1265 /// Register a discovery spec.
1266 pub fn register_discovery(
1267 &self,
1268 spec: DiscoverySpec,
1269 replace: bool,
1270 ) -> Result<RegistrationResult> {
1271 Ok(self
1272 .discovery
1273 .write()
1274 .map_err(|e| EngineError::Internal(e.to_string()))?
1275 .register(spec, replace))
1276 }
1277
1278 /// Unregister a server from discovery.
1279 pub fn unregister_discovery(&self, server_name: &str) -> Result<bool> {
1280 Ok(self
1281 .discovery
1282 .write()
1283 .map_err(|e| EngineError::Internal(e.to_string()))?
1284 .unregister(server_name))
1285 }
1286
1287 /// Query tools across registered servers.
1288 pub fn query_tools(&self, query: &str, top_k: usize) -> Result<Vec<ToolQueryResult>> {
1289 Ok(self
1290 .discovery
1291 .read()
1292 .map_err(|e| EngineError::Internal(e.to_string()))?
1293 .query(query, top_k))
1294 }
1295
1296 /// Find tools similar to a given tool.
1297 pub fn similar_tools(&self, tool_id: &str, top_k: usize) -> Result<Vec<ToolQueryResult>> {
1298 Ok(self
1299 .discovery
1300 .read()
1301 .map_err(|e| EngineError::Internal(e.to_string()))?
1302 .similar(tool_id, top_k))
1303 }
1304
1305 /// List all registered discovery servers.
1306 pub fn list_discovery_servers(&self) -> Result<Vec<ServerSummary>> {
1307 Ok(self
1308 .discovery
1309 .read()
1310 .map_err(|e| EngineError::Internal(e.to_string()))?
1311 .list_servers())
1312 }
1313
1314 /// List discovery categories.
1315 pub fn list_discovery_categories(&self) -> Result<HashMap<String, CategorySummary>> {
1316 Ok(self
1317 .discovery
1318 .read()
1319 .map_err(|e| EngineError::Internal(e.to_string()))?
1320 .list_categories())
1321 }
1322
1323 /// Get discovery index stats.
1324 pub fn discovery_index_stats(&self) -> Result<Option<IndexStats>> {
1325 Ok(self
1326 .discovery
1327 .read()
1328 .map_err(|e| EngineError::Internal(e.to_string()))?
1329 .index_stats())
1330 }
1331
1332 /// Get the discovery schema.
1333 pub fn get_discovery_schema(&self) -> Value {
1334 DiscoveryRegistry::get_schema()
1335 }
1336}
1337
1338impl Default for JpxEngine {
1339 fn default() -> Self {
1340 Self::new()
1341 }
1342}
1343
1344// =============================================================================
1345// Helper functions
1346// =============================================================================
1347
1348/// Context for calculating match scores
1349struct MatchContext<'a> {
1350 name: &'a str,
1351 aliases: &'a [String],
1352 category: &'a str,
1353 description: &'a str,
1354 signature: &'a str,
1355}
1356
1357/// Calculate match score and type for a function
1358fn calculate_match_score(
1359 query: &str,
1360 expanded_terms: &[String],
1361 ctx: &MatchContext,
1362) -> (i32, String) {
1363 // Exact name match
1364 if ctx.name == query {
1365 return (1000, "exact_name".to_string());
1366 }
1367
1368 // Alias match
1369 if ctx.aliases.iter().any(|a| a == query) {
1370 return (900, "alias".to_string());
1371 }
1372
1373 // Name starts with query
1374 if ctx.name.starts_with(query) {
1375 return (800, "name_prefix".to_string());
1376 }
1377
1378 // Name contains query
1379 if ctx.name.contains(query) {
1380 return (700, "name_contains".to_string());
1381 }
1382
1383 // Category match
1384 if ctx.category == query {
1385 return (600, "category".to_string());
1386 }
1387
1388 // Check expanded terms in description/signature
1389 let mut desc_score = 0;
1390 let mut matched_terms = Vec::new();
1391
1392 for term in expanded_terms {
1393 if ctx.description.contains(term) || ctx.signature.contains(term) {
1394 desc_score += 100;
1395 matched_terms.push(term.clone());
1396 }
1397 }
1398
1399 if desc_score > 0 {
1400 return (
1401 desc_score,
1402 format!("description ({})", matched_terms.join(", ")),
1403 );
1404 }
1405
1406 // Fuzzy name match using Jaro-Winkler
1407 let similarity = jaro_winkler(query, ctx.name);
1408 if similarity > 0.8 {
1409 return ((similarity * 500.0) as i32, "fuzzy_name".to_string());
1410 }
1411
1412 // Check synonyms
1413 if let Some(synonyms) = lookup_synonyms(query) {
1414 for syn in synonyms {
1415 if ctx.name.contains(syn) || ctx.description.contains(syn) {
1416 return (300, format!("synonym ({})", syn));
1417 }
1418 }
1419 }
1420
1421 (0, String::new())
1422}
1423
1424/// Parse category string to Category enum
1425fn parse_category(name: &str) -> Option<Category> {
1426 Category::all()
1427 .iter()
1428 .find(|cat| format!("{:?}", cat).to_lowercase() == name.to_lowercase())
1429 .copied()
1430}
1431
1432/// Count parameters in a function signature
1433fn count_params(signature: &str) -> usize {
1434 signature.matches(',').count() + 1
1435}
1436
1437/// Extract keywords from a description for related concept matching
1438fn extract_keywords(description: &str) -> Vec<&str> {
1439 let stopwords = [
1440 "a",
1441 "an",
1442 "the",
1443 "is",
1444 "are",
1445 "was",
1446 "were",
1447 "be",
1448 "been",
1449 "being",
1450 "have",
1451 "has",
1452 "had",
1453 "do",
1454 "does",
1455 "did",
1456 "will",
1457 "would",
1458 "could",
1459 "should",
1460 "may",
1461 "might",
1462 "must",
1463 "shall",
1464 "can",
1465 "to",
1466 "of",
1467 "in",
1468 "for",
1469 "on",
1470 "with",
1471 "at",
1472 "by",
1473 "from",
1474 "or",
1475 "and",
1476 "as",
1477 "if",
1478 "that",
1479 "which",
1480 "this",
1481 "these",
1482 "those",
1483 "it",
1484 "its",
1485 "such",
1486 "when",
1487 "where",
1488 "how",
1489 "all",
1490 "each",
1491 "every",
1492 "both",
1493 "few",
1494 "more",
1495 "most",
1496 "other",
1497 "some",
1498 "any",
1499 "no",
1500 "not",
1501 "only",
1502 "same",
1503 "than",
1504 "very",
1505 "just",
1506 "also",
1507 "into",
1508 "over",
1509 "after",
1510 "before",
1511 "between",
1512 "under",
1513 "again",
1514 "further",
1515 "then",
1516 "once",
1517 "here",
1518 "there",
1519 "why",
1520 "because",
1521 "while",
1522 "although",
1523 "though",
1524 "unless",
1525 "until",
1526 "whether",
1527 "returns",
1528 "return",
1529 "value",
1530 "values",
1531 "given",
1532 "input",
1533 "output",
1534 "function",
1535 "functions",
1536 "used",
1537 "using",
1538 "use",
1539 ];
1540
1541 description
1542 .split(|c: char| !c.is_alphanumeric())
1543 .filter(|w| w.len() > 2 && !stopwords.contains(&w.to_lowercase().as_str()))
1544 .collect()
1545}
1546
1547/// Extract keys recursively from a JSON value
1548fn extract_keys_recursive(value: &Value, prefix: &str, keys: &mut Vec<String>) {
1549 match value {
1550 Value::Object(map) => {
1551 for (k, v) in map {
1552 let path = if prefix.is_empty() {
1553 k.clone()
1554 } else {
1555 format!("{}.{}", prefix, k)
1556 };
1557 keys.push(path.clone());
1558 extract_keys_recursive(v, &path, keys);
1559 }
1560 }
1561 Value::Array(arr) => {
1562 for (i, v) in arr.iter().enumerate() {
1563 let path = format!("{}.{}", prefix, i);
1564 extract_keys_recursive(v, &path, keys);
1565 }
1566 }
1567 _ => {}
1568 }
1569}
1570
1571/// Extract paths from a JSON value
1572fn extract_paths(
1573 value: &Value,
1574 prefix: &str,
1575 include_types: bool,
1576 include_values: bool,
1577 paths: &mut Vec<PathInfo>,
1578) {
1579 let current_path = if prefix.is_empty() {
1580 "@".to_string()
1581 } else {
1582 prefix.to_string()
1583 };
1584
1585 match value {
1586 Value::Object(map) => {
1587 paths.push(PathInfo {
1588 path: current_path.clone(),
1589 path_type: if include_types {
1590 Some("object".to_string())
1591 } else {
1592 None
1593 },
1594 value: None,
1595 });
1596 for (k, v) in map {
1597 let new_prefix = if prefix.is_empty() {
1598 k.clone()
1599 } else {
1600 format!("{}.{}", prefix, k)
1601 };
1602 extract_paths(v, &new_prefix, include_types, include_values, paths);
1603 }
1604 }
1605 Value::Array(arr) => {
1606 paths.push(PathInfo {
1607 path: current_path.clone(),
1608 path_type: if include_types {
1609 Some("array".to_string())
1610 } else {
1611 None
1612 },
1613 value: None,
1614 });
1615 for (i, v) in arr.iter().enumerate() {
1616 let new_prefix = format!("{}.{}", prefix, i);
1617 extract_paths(v, &new_prefix, include_types, include_values, paths);
1618 }
1619 }
1620 _ => {
1621 paths.push(PathInfo {
1622 path: current_path,
1623 path_type: if include_types {
1624 Some(json_type_name(value).to_string())
1625 } else {
1626 None
1627 },
1628 value: if include_values {
1629 Some(value.clone())
1630 } else {
1631 None
1632 },
1633 });
1634 }
1635 }
1636}
1637
1638/// Calculate the nesting depth of a JSON value
1639fn calculate_depth(value: &Value) -> usize {
1640 match value {
1641 Value::Object(map) => 1 + map.values().map(calculate_depth).max().unwrap_or(0),
1642 Value::Array(arr) => 1 + arr.iter().map(calculate_depth).max().unwrap_or(0),
1643 _ => 0,
1644 }
1645}
1646
1647/// Get the type name of a JSON value
1648fn json_type_name(value: &Value) -> &'static str {
1649 match value {
1650 Value::Null => "null",
1651 Value::Bool(_) => "boolean",
1652 Value::Number(_) => "number",
1653 Value::String(_) => "string",
1654 Value::Array(_) => "array",
1655 Value::Object(_) => "object",
1656 }
1657}
1658
1659/// Calculate type distribution in an array
1660fn calculate_type_distribution(arr: &[Value]) -> HashMap<String, usize> {
1661 let mut dist = HashMap::new();
1662 for item in arr {
1663 *dist.entry(json_type_name(item).to_string()).or_insert(0) += 1;
1664 }
1665 dist
1666}
1667
1668/// Analyze fields in an array of objects
1669fn analyze_array_fields(arr: &[Value]) -> Vec<FieldAnalysis> {
1670 let mut field_types: HashMap<String, HashMap<String, usize>> = HashMap::new();
1671 let mut field_null_counts: HashMap<String, usize> = HashMap::new();
1672 let mut field_values: HashMap<String, Vec<Value>> = HashMap::new();
1673
1674 for item in arr {
1675 if let Value::Object(map) = item {
1676 for (k, v) in map {
1677 let types = field_types.entry(k.clone()).or_default();
1678 *types.entry(json_type_name(v).to_string()).or_insert(0) += 1;
1679
1680 if v.is_null() {
1681 *field_null_counts.entry(k.clone()).or_insert(0) += 1;
1682 }
1683
1684 // Track unique values for low-cardinality detection
1685 let values = field_values.entry(k.clone()).or_default();
1686 if values.len() < 100 && !values.contains(v) {
1687 values.push(v.clone());
1688 }
1689 }
1690 }
1691 }
1692
1693 let mut fields: Vec<FieldAnalysis> = field_types
1694 .into_iter()
1695 .map(|(name, types)| {
1696 let predominant_type = types
1697 .into_iter()
1698 .max_by_key(|(_, count)| *count)
1699 .map(|(t, _)| t)
1700 .unwrap_or_else(|| "unknown".to_string());
1701
1702 let null_count = field_null_counts.get(&name).copied().unwrap_or(0);
1703 let unique_count = field_values.get(&name).map(|v| v.len());
1704
1705 FieldAnalysis {
1706 name,
1707 field_type: predominant_type,
1708 null_count,
1709 unique_count,
1710 }
1711 })
1712 .collect();
1713
1714 fields.sort_by(|a, b| a.name.cmp(&b.name));
1715 fields
1716}
1717
1718/// Format size in human-readable form
1719fn format_size(bytes: usize) -> String {
1720 const KB: usize = 1024;
1721 const MB: usize = KB * 1024;
1722 const GB: usize = MB * 1024;
1723
1724 if bytes >= GB {
1725 format!("{:.2} GB", bytes as f64 / GB as f64)
1726 } else if bytes >= MB {
1727 format!("{:.2} MB", bytes as f64 / MB as f64)
1728 } else if bytes >= KB {
1729 format!("{:.2} KB", bytes as f64 / KB as f64)
1730 } else {
1731 format!("{} bytes", bytes)
1732 }
1733}
1734
1735/// Extension trait for pipe-style method chaining
1736trait Pipe: Sized {
1737 fn pipe<T, F: FnOnce(Self) -> T>(self, f: F) -> T {
1738 f(self)
1739 }
1740}
1741
1742impl<T> Pipe for T {}
1743
1744#[cfg(test)]
1745mod tests {
1746 use super::*;
1747 use serde_json::json;
1748
1749 #[test]
1750 fn test_engine_creation() {
1751 let engine = JpxEngine::new();
1752 assert!(!engine.is_strict());
1753 }
1754
1755 #[test]
1756 fn test_engine_strict_mode() {
1757 let engine = JpxEngine::strict();
1758 assert!(engine.is_strict());
1759 }
1760
1761 #[test]
1762 fn test_engine_default() {
1763 let engine = JpxEngine::default();
1764 assert!(!engine.is_strict());
1765 }
1766
1767 #[test]
1768 fn test_evaluate() {
1769 let engine = JpxEngine::new();
1770 let input = json!({"users": [{"name": "alice"}, {"name": "bob"}]});
1771 let result = engine.evaluate("users[*].name", &input).unwrap();
1772 assert_eq!(result, json!(["alice", "bob"]));
1773 }
1774
1775 #[test]
1776 fn test_evaluate_str() {
1777 let engine = JpxEngine::new();
1778 let result = engine.evaluate_str("length(@)", r#"[1, 2, 3]"#).unwrap();
1779 assert_eq!(result, json!(3));
1780 }
1781
1782 #[test]
1783 fn test_batch_evaluate() {
1784 let engine = JpxEngine::new();
1785 let input = json!({"a": 1, "b": 2});
1786 let exprs = vec!["a".to_string(), "b".to_string(), "c".to_string()];
1787 let result = engine.batch_evaluate(&exprs, &input);
1788
1789 assert_eq!(result.results.len(), 3);
1790 assert_eq!(result.results[0].result, Some(json!(1)));
1791 assert_eq!(result.results[1].result, Some(json!(2)));
1792 assert_eq!(result.results[2].result, Some(json!(null)));
1793 }
1794
1795 #[test]
1796 fn test_validate() {
1797 let engine = JpxEngine::new();
1798
1799 let valid = engine.validate("users[*].name");
1800 assert!(valid.valid);
1801 assert!(valid.error.is_none());
1802
1803 let invalid = engine.validate("users[*.name");
1804 assert!(!invalid.valid);
1805 assert!(invalid.error.is_some());
1806 }
1807
1808 #[test]
1809 fn test_categories() {
1810 let engine = JpxEngine::new();
1811 let cats = engine.categories();
1812 assert!(!cats.is_empty());
1813 assert!(cats.iter().any(|c| c == "String"));
1814 }
1815
1816 #[test]
1817 fn test_functions() {
1818 let engine = JpxEngine::new();
1819
1820 // All functions
1821 let all = engine.functions(None);
1822 assert!(!all.is_empty());
1823
1824 // Filtered by category
1825 let string_funcs = engine.functions(Some("String"));
1826 assert!(!string_funcs.is_empty());
1827 assert!(string_funcs.iter().all(|f| f.category == "String"));
1828 }
1829
1830 #[test]
1831 fn test_describe_function() {
1832 let engine = JpxEngine::new();
1833
1834 let info = engine.describe_function("upper").unwrap();
1835 assert_eq!(info.name, "upper");
1836 assert_eq!(info.category, "String");
1837
1838 let missing = engine.describe_function("nonexistent");
1839 assert!(missing.is_none());
1840 }
1841
1842 #[test]
1843 fn test_search_functions() {
1844 let engine = JpxEngine::new();
1845
1846 let results = engine.search_functions("string", 10);
1847 assert!(!results.is_empty());
1848 }
1849
1850 #[test]
1851 fn test_similar_functions() {
1852 let engine = JpxEngine::new();
1853
1854 let result = engine.similar_functions("upper").unwrap();
1855 // Should have functions in same category
1856 assert!(!result.same_category.is_empty());
1857 }
1858
1859 #[test]
1860 fn test_format_json() {
1861 let engine = JpxEngine::new();
1862
1863 let formatted = engine.format_json(r#"{"a":1,"b":2}"#, 2).unwrap();
1864 assert!(formatted.contains('\n'));
1865
1866 let compact = engine.format_json(r#"{"a":1,"b":2}"#, 0).unwrap();
1867 assert!(!compact.contains('\n'));
1868 }
1869
1870 #[test]
1871 fn test_diff() {
1872 let engine = JpxEngine::new();
1873
1874 let patch = engine.diff(r#"{"a": 1}"#, r#"{"a": 2}"#).unwrap();
1875
1876 let patch_arr = patch.as_array().unwrap();
1877 assert!(!patch_arr.is_empty());
1878 }
1879
1880 #[test]
1881 fn test_patch() {
1882 let engine = JpxEngine::new();
1883
1884 let result = engine
1885 .patch(
1886 r#"{"a": 1}"#,
1887 r#"[{"op": "replace", "path": "/a", "value": 2}]"#,
1888 )
1889 .unwrap();
1890
1891 assert_eq!(result, json!({"a": 2}));
1892 }
1893
1894 #[test]
1895 fn test_merge() {
1896 let engine = JpxEngine::new();
1897
1898 let result = engine
1899 .merge(r#"{"a": 1, "b": 2}"#, r#"{"b": 3, "c": 4}"#)
1900 .unwrap();
1901
1902 assert_eq!(result, json!({"a": 1, "b": 3, "c": 4}));
1903 }
1904
1905 #[test]
1906 fn test_keys() {
1907 let engine = JpxEngine::new();
1908
1909 let keys = engine.keys(r#"{"a": 1, "b": {"c": 2}}"#, false).unwrap();
1910 assert_eq!(keys, vec!["a", "b"]);
1911
1912 let recursive_keys = engine.keys(r#"{"a": 1, "b": {"c": 2}}"#, true).unwrap();
1913 assert!(recursive_keys.contains(&"b.c".to_string()));
1914 }
1915
1916 #[test]
1917 fn test_paths() {
1918 let engine = JpxEngine::new();
1919
1920 let paths = engine.paths(r#"{"a": 1}"#, true, false).unwrap();
1921 assert!(!paths.is_empty());
1922 }
1923
1924 #[test]
1925 fn test_stats() {
1926 let engine = JpxEngine::new();
1927
1928 let stats = engine.stats(r#"[1, 2, 3]"#).unwrap();
1929 assert_eq!(stats.root_type, "array");
1930 assert_eq!(stats.length, Some(3));
1931 }
1932
1933 #[test]
1934 fn test_query_store() {
1935 let engine = JpxEngine::new();
1936
1937 // Define a query
1938 engine
1939 .define_query("count".to_string(), "length(@)".to_string(), None)
1940 .unwrap();
1941
1942 // Get it
1943 let query = engine.get_query("count").unwrap().unwrap();
1944 assert_eq!(query.expression, "length(@)");
1945
1946 // Run it
1947 let result = engine.run_query("count", &json!([1, 2, 3])).unwrap();
1948 assert_eq!(result, json!(3));
1949
1950 // List queries
1951 let queries = engine.list_queries().unwrap();
1952 assert_eq!(queries.len(), 1);
1953
1954 // Delete it
1955 engine.delete_query("count").unwrap();
1956 assert!(engine.get_query("count").unwrap().is_none());
1957 }
1958
1959 #[test]
1960 fn test_discovery() {
1961 let engine = JpxEngine::new();
1962
1963 let spec: DiscoverySpec = serde_json::from_value(json!({
1964 "server": {"name": "test-server", "version": "1.0.0"},
1965 "tools": [
1966 {"name": "test_tool", "description": "A test tool", "tags": ["test"]}
1967 ]
1968 }))
1969 .unwrap();
1970
1971 // Register
1972 let result = engine.register_discovery(spec, false).unwrap();
1973 assert!(result.ok);
1974 assert_eq!(result.tools_indexed, 1);
1975
1976 // List servers
1977 let servers = engine.list_discovery_servers().unwrap();
1978 assert_eq!(servers.len(), 1);
1979
1980 // Query tools
1981 let tools = engine.query_tools("test", 10).unwrap();
1982 assert!(!tools.is_empty());
1983
1984 // Unregister
1985 assert!(engine.unregister_discovery("test-server").unwrap());
1986 assert!(engine.list_discovery_servers().unwrap().is_empty());
1987 }
1988}