hedl_json/
jsonpath.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! JSONPath query support for HEDL documents
19//!
20//! This module provides JSONPath query functionality for HEDL documents,
21//! allowing efficient extraction of specific data using standard JSONPath syntax.
22//!
23//! # Features
24//!
25//! - **Standard JSONPath Syntax**: Full support for JSONPath expressions
26//! - **Efficient Queries**: Optimized query execution with minimal allocations
27//! - **Type-Safe Results**: Returns strongly-typed query results
28//! - **Error Handling**: Comprehensive error reporting for invalid queries
29//!
30//! # Examples
31//!
32//! ```text
33//! use hedl_json::jsonpath::{query, QueryConfig};
34//! use hedl_core::Document;
35//!
36//! fn example() -> Result<(), Box<dyn std::error::Error>> {
37//!     let doc = hedl_core::parse("name: \"Alice\"\nage: 30".as_bytes())?;
38//!     let config = QueryConfig::default();
39//!
40//!     // Simple field access
41//!     let results = query(&doc, "$.name", &config)?;
42//!     assert_eq!(results.len(), 1);
43//!
44//!     // Array filtering
45//!     let results = query(&doc, "$.users[?(@.age > 25)]", &config)?;
46//!     Ok(())
47//! }
48//! ```
49
50use hedl_core::Document;
51use serde_json::Value as JsonValue;
52use serde_json_path::JsonPath;
53use std::str::FromStr;
54use thiserror::Error;
55
56use crate::{to_json_value, ToJsonConfig};
57
58/// Errors that can occur during JSONPath queries
59#[derive(Debug, Error, Clone, PartialEq)]
60pub enum QueryError {
61    /// Invalid JSONPath expression
62    #[error("Invalid JSONPath expression: {0}")]
63    InvalidExpression(String),
64
65    /// Document conversion error
66    #[error("Failed to convert HEDL document to JSON: {0}")]
67    ConversionError(String),
68
69    /// Query execution error
70    #[error("Query execution failed: {0}")]
71    ExecutionError(String),
72}
73
74/// Result type for JSONPath queries
75pub type QueryResult<T> = Result<T, QueryError>;
76
77/// Configuration for JSONPath queries
78#[derive(Debug, Clone)]
79pub struct QueryConfig {
80    /// Include HEDL metadata in JSON conversion
81    pub include_metadata: bool,
82
83    /// Flatten matrix lists to plain arrays
84    pub flatten_lists: bool,
85
86    /// Include children as nested arrays
87    pub include_children: bool,
88
89    /// Maximum number of results to return (0 = unlimited)
90    pub max_results: usize,
91}
92
93impl Default for QueryConfig {
94    fn default() -> Self {
95        Self {
96            include_metadata: false,
97            flatten_lists: false,
98            include_children: true,
99            max_results: 0, // Unlimited
100        }
101    }
102}
103
104impl From<&QueryConfig> for ToJsonConfig {
105    fn from(config: &QueryConfig) -> Self {
106        ToJsonConfig {
107            include_metadata: config.include_metadata,
108            flatten_lists: config.flatten_lists,
109            include_children: config.include_children,
110        }
111    }
112}
113
114/// Query a HEDL document using JSONPath expression
115///
116/// # Arguments
117///
118/// * `doc` - The HEDL document to query
119/// * `path` - JSONPath expression (e.g., "$.users[*].name")
120/// * `config` - Query configuration
121///
122/// # Returns
123///
124/// Vector of matching JSON values
125///
126/// # Examples
127///
128/// ```text
129/// use hedl_json::jsonpath::{query, QueryConfig};
130/// use hedl_core::Document;
131///
132/// fn example() -> Result<(), Box<dyn std::error::Error>> {
133///     let doc = hedl_core::parse("users: [@User]\n  u1 Alice 30".as_bytes())?;
134///     let config = QueryConfig::default();
135///
136///     let results = query(&doc, "$.users", &config)?;
137///     assert!(!results.is_empty());
138///     Ok(())
139/// }
140/// ```
141pub fn query(doc: &Document, path: &str, config: &QueryConfig) -> QueryResult<Vec<JsonValue>> {
142    // Convert HEDL document to JSON
143    let json_config: ToJsonConfig = config.into();
144    let json_value = to_json_value(doc, &json_config).map_err(QueryError::ConversionError)?;
145
146    // Parse JSONPath expression
147    let json_path = JsonPath::from_str(path)
148        .map_err(|e| QueryError::InvalidExpression(format!("{}", e)))?;
149
150    // Execute query
151    let node_list = json_path.query(&json_value);
152
153    // Collect results with optional limit
154    let results: Vec<JsonValue> = if config.max_results > 0 {
155        node_list.into_iter().take(config.max_results).cloned().collect()
156    } else {
157        node_list.all().into_iter().cloned().collect()
158    };
159
160    Ok(results)
161}
162
163/// Query a HEDL document and return the first match
164///
165/// Convenience function for queries expected to return a single result.
166///
167/// # Arguments
168///
169/// * `doc` - The HEDL document to query
170/// * `path` - JSONPath expression
171/// * `config` - Query configuration
172///
173/// # Returns
174///
175/// The first matching JSON value, or None if no matches found
176///
177/// # Examples
178///
179/// ```text
180/// use hedl_json::jsonpath::{query_first, QueryConfig};
181/// use hedl_core::Document;
182///
183/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
184/// let doc = hedl_core::parse_hedl("name: \"Alice\""?;
185/// let config = QueryConfig::default();
186///
187/// let result = query_first(&doc, "$.name", &config)?;
188/// assert!(result.is_some());
189/// # Ok(())
190/// # }
191/// ```
192pub fn query_first(
193    doc: &Document,
194    path: &str,
195    config: &QueryConfig,
196) -> QueryResult<Option<JsonValue>> {
197    let results = query(doc, path, config)?;
198    Ok(results.into_iter().next())
199}
200
201/// Query a HEDL document and return a single expected match
202///
203/// Returns an error if the query returns zero or multiple results.
204///
205/// # Arguments
206///
207/// * `doc` - The HEDL document to query
208/// * `path` - JSONPath expression
209/// * `config` - Query configuration
210///
211/// # Returns
212///
213/// The single matching JSON value
214///
215/// # Errors
216///
217/// Returns error if zero or multiple matches found
218///
219/// # Examples
220///
221/// ```text
222/// use hedl_json::jsonpath::{query_single, QueryConfig};
223/// use hedl_core::Document;
224///
225/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
226/// let doc = hedl_core::parse_hedl("name: \"Alice\""?;
227/// let config = QueryConfig::default();
228///
229/// let result = query_single(&doc, "$.name", &config)?;
230/// assert_eq!(result.as_str(), Some("Alice"));
231/// # Ok(())
232/// # }
233/// ```
234pub fn query_single(doc: &Document, path: &str, config: &QueryConfig) -> QueryResult<JsonValue> {
235    let results = query(doc, path, config)?;
236
237    match results.len() {
238        0 => Err(QueryError::ExecutionError(
239            "Query returned no results".to_string(),
240        )),
241        1 => Ok(results.into_iter().next().unwrap()),
242        n => Err(QueryError::ExecutionError(format!(
243            "Query returned {} results, expected exactly 1",
244            n
245        ))),
246    }
247}
248
249/// Check if a JSONPath query matches any elements in a HEDL document
250///
251/// # Arguments
252///
253/// * `doc` - The HEDL document to query
254/// * `path` - JSONPath expression
255/// * `config` - Query configuration
256///
257/// # Returns
258///
259/// true if at least one match found, false otherwise
260///
261/// # Examples
262///
263/// ```text
264/// use hedl_json::jsonpath::{query_exists, QueryConfig};
265/// use hedl_core::Document;
266///
267/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
268/// let doc = hedl_core::parse_hedl("name: \"Alice\""?;
269/// let config = QueryConfig::default();
270///
271/// assert!(query_exists(&doc, "$.name", &config)?);
272/// assert!(!query_exists(&doc, "$.missing", &config)?);
273/// # Ok(())
274/// # }
275/// ```
276pub fn query_exists(doc: &Document, path: &str, config: &QueryConfig) -> QueryResult<bool> {
277    let results = query(doc, path, config)?;
278    Ok(!results.is_empty())
279}
280
281/// Count the number of matches for a JSONPath query
282///
283/// # Arguments
284///
285/// * `doc` - The HEDL document to query
286/// * `path` - JSONPath expression
287/// * `config` - Query configuration
288///
289/// # Returns
290///
291/// Number of matching elements
292///
293/// # Examples
294///
295/// ```text
296/// use hedl_json::jsonpath::{query_count, QueryConfig};
297/// use hedl_core::Document;
298///
299/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
300/// let doc = hedl_core::parse_hedl("items: [1, 2, 3]")?;
301/// let config = QueryConfig::default();
302///
303/// let count = query_count(&doc, "$.items[*]", &config)?;
304/// assert_eq!(count, 3);
305/// # Ok(())
306/// # }
307/// ```
308pub fn query_count(doc: &Document, path: &str, config: &QueryConfig) -> QueryResult<usize> {
309    let results = query(doc, path, config)?;
310    Ok(results.len())
311}
312
313/// Builder for constructing QueryConfig instances
314#[derive(Debug, Default)]
315pub struct QueryConfigBuilder {
316    include_metadata: bool,
317    flatten_lists: bool,
318    include_children: bool,
319    max_results: usize,
320}
321
322impl QueryConfigBuilder {
323    /// Create a new QueryConfigBuilder
324    pub fn new() -> Self {
325        Self::default()
326    }
327
328    /// Include HEDL metadata in JSON conversion
329    pub fn include_metadata(mut self, value: bool) -> Self {
330        self.include_metadata = value;
331        self
332    }
333
334    /// Flatten matrix lists to plain arrays
335    pub fn flatten_lists(mut self, value: bool) -> Self {
336        self.flatten_lists = value;
337        self
338    }
339
340    /// Include children as nested arrays
341    pub fn include_children(mut self, value: bool) -> Self {
342        self.include_children = value;
343        self
344    }
345
346    /// Set maximum number of results (0 = unlimited)
347    pub fn max_results(mut self, value: usize) -> Self {
348        self.max_results = value;
349        self
350    }
351
352    /// Build the QueryConfig
353    pub fn build(self) -> QueryConfig {
354        QueryConfig {
355            include_metadata: self.include_metadata,
356            flatten_lists: self.flatten_lists,
357            include_children: self.include_children,
358            max_results: self.max_results,
359        }
360    }
361}
362
363#[cfg(test)]
364mod tests {
365    use super::*;
366    use hedl_core::parse;
367
368    /// Helper to parse HEDL from string for tests
369    fn parse_hedl(input: &str) -> Document {
370        // Prepend HEDL header if not present, or separate header from body if needed
371        let hedl = if input.contains("%VERSION") || input.starts_with("%HEDL") {
372            input.to_string()
373        } else if input.contains("%STRUCT") || input.contains("%NEST") {
374            // Has directives but no VERSION - add VERSION and ensure separator
375            let (header, body) = if input.contains("---") {
376                let parts: Vec<&str> = input.splitn(2, "---").collect();
377                (parts[0].trim().to_string(), parts.get(1).map(|s| s.trim().to_string()).unwrap_or_default())
378            } else {
379                // Extract directives to header
380                let mut header_lines = Vec::new();
381                let mut body_lines = Vec::new();
382                for line in input.lines() {
383                    if line.trim().starts_with('%') {
384                        header_lines.push(line.to_string());
385                    } else {
386                        body_lines.push(line.to_string());
387                    }
388                }
389                (header_lines.join("\n"), body_lines.join("\n"))
390            };
391            format!("%VERSION: 1.0\n{}\n---\n{}", header, body)
392        } else {
393            format!("%VERSION: 1.0\n---\n{}", input)
394        };
395        parse(hedl.as_bytes()).unwrap()
396    }
397
398    // ==================== Basic Query Tests ====================
399
400    #[test]
401    fn test_query_simple_field() {
402        let doc = parse_hedl("name: \"Alice\"");
403        let config = QueryConfig::default();
404
405        let results = query(&doc, "$.name", &config).unwrap();
406        assert_eq!(results.len(), 1);
407        assert_eq!(results[0].as_str(), Some("Alice"));
408    }
409
410    #[test]
411    fn test_query_nested_field() {
412        let doc = parse_hedl("user:\n  name: \"Bob\"\n  age: 25");
413        let config = QueryConfig::default();
414
415        let results = query(&doc, "$.user.name", &config).unwrap();
416        assert_eq!(results.len(), 1);
417        assert_eq!(results[0].as_str(), Some("Bob"));
418    }
419
420    #[test]
421    fn test_query_missing_field() {
422        let doc = parse_hedl("name: \"Alice\"");
423        let config = QueryConfig::default();
424
425        let results = query(&doc, "$.missing", &config).unwrap();
426        assert_eq!(results.len(), 0);
427    }
428
429    #[test]
430    fn test_query_root() {
431        let doc = parse_hedl("name: \"Alice\"");
432        let config = QueryConfig::default();
433
434        let results = query(&doc, "$", &config).unwrap();
435        assert_eq!(results.len(), 1);
436        assert!(results[0].is_object());
437    }
438
439    #[test]
440    fn test_query_wildcard() {
441        let doc = parse_hedl("a: 1\nb: 2\nc: 3");
442        let config = QueryConfig::default();
443
444        let results = query(&doc, "$.*", &config).unwrap();
445        assert_eq!(results.len(), 3);
446    }
447
448    // ==================== Query Helper Tests ====================
449
450    #[test]
451    fn test_query_first_success() {
452        let doc = parse_hedl("name: \"Alice\"");
453        let config = QueryConfig::default();
454
455        let result = query_first(&doc, "$.name", &config).unwrap();
456        assert!(result.is_some());
457        assert_eq!(result.unwrap().as_str(), Some("Alice"));
458    }
459
460    #[test]
461    fn test_query_first_no_match() {
462        let doc = parse_hedl("name: \"Alice\"");
463        let config = QueryConfig::default();
464
465        let result = query_first(&doc, "$.missing", &config).unwrap();
466        assert!(result.is_none());
467    }
468
469    #[test]
470    fn test_query_single_success() {
471        let doc = parse_hedl("name: \"Alice\"");
472        let config = QueryConfig::default();
473
474        let result = query_single(&doc, "$.name", &config).unwrap();
475        assert_eq!(result.as_str(), Some("Alice"));
476    }
477
478    #[test]
479    fn test_query_single_no_results() {
480        let doc = parse_hedl("name: \"Alice\"");
481        let config = QueryConfig::default();
482
483        let result = query_single(&doc, "$.missing", &config);
484        assert!(result.is_err());
485        assert!(matches!(result.unwrap_err(), QueryError::ExecutionError(_)));
486    }
487
488    #[test]
489    fn test_query_single_multiple_results() {
490        let doc = parse_hedl("a: 1\nb: 2");
491        let config = QueryConfig::default();
492
493        let result = query_single(&doc, "$.*", &config);
494        assert!(result.is_err());
495        assert!(matches!(result.unwrap_err(), QueryError::ExecutionError(_)));
496    }
497
498    #[test]
499    fn test_query_exists_true() {
500        let doc = parse_hedl("name: \"Alice\"");
501        let config = QueryConfig::default();
502
503        assert!(query_exists(&doc, "$.name", &config).unwrap());
504    }
505
506    #[test]
507    fn test_query_exists_false() {
508        let doc = parse_hedl("name: \"Alice\"");
509        let config = QueryConfig::default();
510
511        assert!(!query_exists(&doc, "$.missing", &config).unwrap());
512    }
513
514    #[test]
515    fn test_query_count() {
516        let doc = parse_hedl("a: 1\nb: 2\nc: 3");
517        let config = QueryConfig::default();
518
519        let count = query_count(&doc, "$.*", &config).unwrap();
520        assert_eq!(count, 3);
521    }
522
523    #[test]
524    fn test_query_count_zero() {
525        let doc = parse_hedl("name: \"Alice\"");
526        let config = QueryConfig::default();
527
528        let count = query_count(&doc, "$.missing", &config).unwrap();
529        assert_eq!(count, 0);
530    }
531
532    // ==================== Configuration Tests ====================
533
534    #[test]
535    fn test_config_builder() {
536        let config = QueryConfigBuilder::new()
537            .include_metadata(true)
538            .flatten_lists(true)
539            .include_children(false)
540            .max_results(10)
541            .build();
542
543        assert!(config.include_metadata);
544        assert!(config.flatten_lists);
545        assert!(!config.include_children);
546        assert_eq!(config.max_results, 10);
547    }
548
549    #[test]
550    fn test_config_default() {
551        let config = QueryConfig::default();
552        assert!(!config.include_metadata);
553        assert!(!config.flatten_lists);
554        assert!(config.include_children);
555        assert_eq!(config.max_results, 0);
556    }
557
558    #[test]
559    fn test_config_max_results() {
560        let doc = parse_hedl("a: 1\nb: 2\nc: 3\nd: 4");
561        let config = QueryConfigBuilder::new().max_results(2).build();
562
563        let results = query(&doc, "$.*", &config).unwrap();
564        assert_eq!(results.len(), 2);
565    }
566
567    // ==================== Error Handling Tests ====================
568
569    #[test]
570    fn test_invalid_jsonpath_expression() {
571        let doc = parse_hedl("name: \"Alice\"");
572        let config = QueryConfig::default();
573
574        let result = query(&doc, "$$invalid", &config);
575        assert!(result.is_err());
576        assert!(matches!(result.unwrap_err(), QueryError::InvalidExpression(_)));
577    }
578
579    #[test]
580    fn test_error_display() {
581        let err = QueryError::InvalidExpression("test error".to_string());
582        let msg = format!("{}", err);
583        assert!(msg.contains("Invalid JSONPath expression"));
584        assert!(msg.contains("test error"));
585    }
586
587    #[test]
588    fn test_error_equality() {
589        let err1 = QueryError::InvalidExpression("test".to_string());
590        let err2 = QueryError::InvalidExpression("test".to_string());
591        assert_eq!(err1, err2);
592    }
593
594    #[test]
595    fn test_error_clone() {
596        let err1 = QueryError::ConversionError("test".to_string());
597        let err2 = err1.clone();
598        assert_eq!(err1, err2);
599    }
600
601    // ==================== Complex Query Tests ====================
602
603    #[test]
604    fn test_query_nested_objects() {
605        let doc = parse_hedl("user:\n  profile:\n    name: \"Alice\"\n    age: 30");
606        let config = QueryConfig::default();
607
608        let results = query(&doc, "$.user.profile.name", &config).unwrap();
609        assert_eq!(results[0].as_str(), Some("Alice"));
610    }
611
612    #[test]
613    fn test_query_multiple_values() {
614        let doc = parse_hedl("a: 1\nb: 2\nc: 3");
615        let config = QueryConfig::default();
616
617        let results = query(&doc, "$.*", &config).unwrap();
618        assert_eq!(results.len(), 3);
619
620        let sum: i64 = results
621            .iter()
622            .filter_map(|v| v.as_i64())
623            .sum();
624        assert_eq!(sum, 6);
625    }
626
627    #[test]
628    fn test_query_with_numbers() {
629        let doc = parse_hedl("count: 42\nprice: 19.99");
630        let config = QueryConfig::default();
631
632        let count = query_single(&doc, "$.count", &config).unwrap();
633        assert_eq!(count.as_i64(), Some(42));
634
635        let price = query_single(&doc, "$.price", &config).unwrap();
636        assert_eq!(price.as_f64(), Some(19.99));
637    }
638
639    #[test]
640    fn test_query_with_booleans() {
641        let doc = parse_hedl("active: true\ndeleted: false");
642        let config = QueryConfig::default();
643
644        let active = query_single(&doc, "$.active", &config).unwrap();
645        assert_eq!(active.as_bool(), Some(true));
646
647        let deleted = query_single(&doc, "$.deleted", &config).unwrap();
648        assert_eq!(deleted.as_bool(), Some(false));
649    }
650
651    #[test]
652    fn test_query_with_null() {
653        let doc = parse_hedl("value: ~");
654        let config = QueryConfig::default();
655
656        let result = query_single(&doc, "$.value", &config).unwrap();
657        assert!(result.is_null());
658    }
659
660    // ==================== Edge Cases ====================
661
662    #[test]
663    fn test_query_empty_document() {
664        let doc = parse_hedl("");
665        let config = QueryConfig::default();
666
667        let results = query(&doc, "$", &config).unwrap();
668        assert_eq!(results.len(), 1);
669        assert!(results[0].is_object());
670    }
671
672    #[test]
673    fn test_query_unicode_fields() {
674        // HEDL field names must be ASCII identifiers, but values can be unicode
675        let doc = parse_hedl("name: \"太郎\"");
676        let config = QueryConfig::default();
677
678        let results = query(&doc, "$.name", &config).unwrap();
679        assert_eq!(results.len(), 1);
680        assert_eq!(results[0].as_str(), Some("太郎"));
681    }
682
683    #[test]
684    fn test_query_with_special_characters() {
685        // HEDL field names must be valid identifiers (no hyphens)
686        // Use underscore instead, and bracket notation still works
687        let doc = parse_hedl("field_name: \"value\"");
688        let config = QueryConfig::default();
689
690        let results = query(&doc, "$['field_name']", &config).unwrap();
691        assert_eq!(results.len(), 1);
692        assert_eq!(results[0].as_str(), Some("value"));
693    }
694
695    #[test]
696    fn test_query_max_results_zero() {
697        let doc = parse_hedl("a: 1\nb: 2\nc: 3");
698        let config = QueryConfigBuilder::new().max_results(0).build();
699
700        let results = query(&doc, "$.*", &config).unwrap();
701        assert_eq!(results.len(), 3); // 0 means unlimited
702    }
703
704    // ==================== Integration Tests ====================
705
706    #[test]
707    fn test_query_builder_chain() {
708        let config = QueryConfigBuilder::new()
709            .include_metadata(true)
710            .flatten_lists(false)
711            .max_results(5)
712            .include_children(true)
713            .build();
714
715        assert!(config.include_metadata);
716        assert!(!config.flatten_lists);
717        assert!(config.include_children);
718        assert_eq!(config.max_results, 5);
719    }
720
721    #[test]
722    fn test_config_to_json_config_conversion() {
723        let query_config = QueryConfigBuilder::new()
724            .include_metadata(true)
725            .flatten_lists(true)
726            .include_children(false)
727            .build();
728
729        let json_config: ToJsonConfig = (&query_config).into();
730
731        assert_eq!(json_config.include_metadata, true);
732        assert_eq!(json_config.flatten_lists, true);
733        assert_eq!(json_config.include_children, false);
734    }
735}