Skip to main content

hedl_json/
jsonpath.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! `JSONPath` query support for HEDL documents
19//!
20//! This module provides `JSONPath` query functionality for HEDL documents,
21//! allowing efficient extraction of specific data using standard `JSONPath` syntax.
22//!
23//! # Features
24//!
25//! - **Standard `JSONPath` Syntax**: Full support for `JSONPath` expressions
26//! - **Efficient Queries**: Optimized query execution with minimal allocations
27//! - **Type-Safe Results**: Returns strongly-typed query results
28//! - **Error Handling**: Comprehensive error reporting for invalid queries
29//!
30//! # Examples
31//!
32//! ```text
33//! use hedl_json::jsonpath::{query, QueryConfig};
34//! use hedl_core::Document;
35//!
36//! fn example() -> Result<(), Box<dyn std::error::Error>> {
37//!     let doc = hedl_core::parse("name: \"Alice\"\nage: 30".as_bytes())?;
38//!     let config = QueryConfig::default();
39//!
40//!     // Simple field access
41//!     let results = query(&doc, "$.name", &config)?;
42//!     assert_eq!(results.len(), 1);
43//!
44//!     // Array filtering
45//!     let results = query(&doc, "$.users[?(@.age > 25)]", &config)?;
46//!     Ok(())
47//! }
48//! ```
49
50use hedl_core::Document;
51use serde_json::Value as JsonValue;
52use serde_json_path::JsonPath;
53use std::str::FromStr;
54use thiserror::Error;
55
56use crate::{to_json_value, ToJsonConfig};
57
58/// Errors that can occur during `JSONPath` queries
59#[derive(Debug, Error, Clone, PartialEq)]
60pub enum QueryError {
61    /// Invalid `JSONPath` expression
62    #[error("Invalid JSONPath expression: {0}")]
63    InvalidExpression(String),
64
65    /// Document conversion error
66    #[error("Failed to convert HEDL document to JSON: {0}")]
67    ConversionError(String),
68
69    /// Query execution error
70    #[error("Query execution failed: {0}")]
71    ExecutionError(String),
72}
73
74/// Result type for `JSONPath` queries
75pub type QueryResult<T> = Result<T, QueryError>;
76
77/// Configuration for `JSONPath` queries
78#[derive(Debug, Clone)]
79pub struct QueryConfig {
80    /// Include HEDL metadata in JSON conversion
81    pub include_metadata: bool,
82
83    /// Flatten matrix lists to plain arrays
84    pub flatten_lists: bool,
85
86    /// Include children as nested arrays
87    pub include_children: bool,
88
89    /// Maximum number of results to return (0 = unlimited)
90    pub max_results: usize,
91}
92
93impl Default for QueryConfig {
94    fn default() -> Self {
95        Self {
96            include_metadata: false,
97            flatten_lists: false,
98            include_children: true,
99            max_results: 0, // Unlimited
100        }
101    }
102}
103
104impl From<&QueryConfig> for ToJsonConfig {
105    fn from(config: &QueryConfig) -> Self {
106        ToJsonConfig {
107            include_metadata: config.include_metadata,
108            flatten_lists: config.flatten_lists,
109            include_children: config.include_children,
110            ascii_safe: false,
111        }
112    }
113}
114
115/// Query a HEDL document using `JSONPath` expression
116///
117/// # Arguments
118///
119/// * `doc` - The HEDL document to query
120/// * `path` - `JSONPath` expression (e.g., `$.users[*].name`)
121/// * `config` - Query configuration
122///
123/// # Returns
124///
125/// Vector of matching JSON values
126///
127/// # Examples
128///
129/// ```text
130/// use hedl_json::jsonpath::{query, QueryConfig};
131/// use hedl_core::Document;
132///
133/// fn example() -> Result<(), Box<dyn std::error::Error>> {
134///     let doc = hedl_core::parse("users: [@User]\n  u1 Alice 30".as_bytes())?;
135///     let config = QueryConfig::default();
136///
137///     let results = query(&doc, "$.users", &config)?;
138///     assert!(!results.is_empty());
139///     Ok(())
140/// }
141/// ```
142pub fn query(doc: &Document, path: &str, config: &QueryConfig) -> QueryResult<Vec<JsonValue>> {
143    // Convert HEDL document to JSON
144    let json_config: ToJsonConfig = config.into();
145    let json_value = to_json_value(doc, &json_config).map_err(QueryError::ConversionError)?;
146
147    // Parse JSONPath expression
148    let json_path =
149        JsonPath::from_str(path).map_err(|e| QueryError::InvalidExpression(format!("{e}")))?;
150
151    // Execute query
152    let node_list = json_path.query(&json_value);
153
154    // Collect results with optional limit
155    let results: Vec<JsonValue> = if config.max_results > 0 {
156        node_list
157            .into_iter()
158            .take(config.max_results)
159            .cloned()
160            .collect()
161    } else {
162        node_list.all().into_iter().cloned().collect()
163    };
164
165    Ok(results)
166}
167
168/// Query a HEDL document and return the first match
169///
170/// Convenience function for queries expected to return a single result.
171///
172/// # Arguments
173///
174/// * `doc` - The HEDL document to query
175/// * `path` - `JSONPath` expression
176/// * `config` - Query configuration
177///
178/// # Returns
179///
180/// The first matching JSON value, or None if no matches found
181///
182/// # Examples
183///
184/// ```text
185/// use hedl_json::jsonpath::{query_first, QueryConfig};
186/// use hedl_core::Document;
187///
188/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
189/// let doc = hedl_core::parse_hedl("name: \"Alice\""?;
190/// let config = QueryConfig::default();
191///
192/// let result = query_first(&doc, "$.name", &config)?;
193/// assert!(result.is_some());
194/// # Ok(())
195/// # }
196/// ```
197pub fn query_first(
198    doc: &Document,
199    path: &str,
200    config: &QueryConfig,
201) -> QueryResult<Option<JsonValue>> {
202    let results = query(doc, path, config)?;
203    Ok(results.into_iter().next())
204}
205
206/// Query a HEDL document and return a single expected match
207///
208/// Returns an error if the query returns zero or multiple results.
209///
210/// # Arguments
211///
212/// * `doc` - The HEDL document to query
213/// * `path` - `JSONPath` expression
214/// * `config` - Query configuration
215///
216/// # Returns
217///
218/// The single matching JSON value
219///
220/// # Errors
221///
222/// Returns error if zero or multiple matches found
223///
224/// # Examples
225///
226/// ```text
227/// use hedl_json::jsonpath::{query_single, QueryConfig};
228/// use hedl_core::Document;
229///
230/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
231/// let doc = hedl_core::parse_hedl("name: \"Alice\""?;
232/// let config = QueryConfig::default();
233///
234/// let result = query_single(&doc, "$.name", &config)?;
235/// assert_eq!(result.as_str(), Some("Alice"));
236/// # Ok(())
237/// # }
238/// ```
239pub fn query_single(doc: &Document, path: &str, config: &QueryConfig) -> QueryResult<JsonValue> {
240    let results = query(doc, path, config)?;
241
242    match results.len() {
243        0 => Err(QueryError::ExecutionError(
244            "Query returned no results".to_string(),
245        )),
246        // SAFETY: len == 1 guarantees next() returns Some
247        1 => Ok(results.into_iter().next().expect("single-element vec")),
248        n => Err(QueryError::ExecutionError(format!(
249            "Query returned {n} results, expected exactly 1"
250        ))),
251    }
252}
253
254/// Check if a `JSONPath` query matches any elements in a HEDL document
255///
256/// # Arguments
257///
258/// * `doc` - The HEDL document to query
259/// * `path` - `JSONPath` expression
260/// * `config` - Query configuration
261///
262/// # Returns
263///
264/// true if at least one match found, false otherwise
265///
266/// # Examples
267///
268/// ```text
269/// use hedl_json::jsonpath::{query_exists, QueryConfig};
270/// use hedl_core::Document;
271///
272/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
273/// let doc = hedl_core::parse_hedl("name: \"Alice\""?;
274/// let config = QueryConfig::default();
275///
276/// assert!(query_exists(&doc, "$.name", &config)?);
277/// assert!(!query_exists(&doc, "$.missing", &config)?);
278/// # Ok(())
279/// # }
280/// ```
281pub fn query_exists(doc: &Document, path: &str, config: &QueryConfig) -> QueryResult<bool> {
282    let results = query(doc, path, config)?;
283    Ok(!results.is_empty())
284}
285
286/// Count the number of matches for a `JSONPath` query
287///
288/// # Arguments
289///
290/// * `doc` - The HEDL document to query
291/// * `path` - `JSONPath` expression
292/// * `config` - Query configuration
293///
294/// # Returns
295///
296/// Number of matching elements
297///
298/// # Examples
299///
300/// ```text
301/// use hedl_json::jsonpath::{query_count, QueryConfig};
302/// use hedl_core::Document;
303///
304/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
305/// let doc = hedl_core::parse_hedl("items: [1, 2, 3]")?;
306/// let config = QueryConfig::default();
307///
308/// let count = query_count(&doc, "$.items[*]", &config)?;
309/// assert_eq!(count, 3);
310/// # Ok(())
311/// # }
312/// ```
313pub fn query_count(doc: &Document, path: &str, config: &QueryConfig) -> QueryResult<usize> {
314    let results = query(doc, path, config)?;
315    Ok(results.len())
316}
317
318/// Builder for constructing `QueryConfig` instances
319#[derive(Debug, Default)]
320pub struct QueryConfigBuilder {
321    include_metadata: bool,
322    flatten_lists: bool,
323    include_children: bool,
324    max_results: usize,
325}
326
327impl QueryConfigBuilder {
328    /// Create a new `QueryConfigBuilder`
329    #[must_use]
330    pub fn new() -> Self {
331        Self::default()
332    }
333
334    /// Include HEDL metadata in JSON conversion
335    #[must_use]
336    pub fn include_metadata(mut self, value: bool) -> Self {
337        self.include_metadata = value;
338        self
339    }
340
341    /// Flatten matrix lists to plain arrays
342    #[must_use]
343    pub fn flatten_lists(mut self, value: bool) -> Self {
344        self.flatten_lists = value;
345        self
346    }
347
348    /// Include children as nested arrays
349    #[must_use]
350    pub fn include_children(mut self, value: bool) -> Self {
351        self.include_children = value;
352        self
353    }
354
355    /// Set maximum number of results (0 = unlimited)
356    #[must_use]
357    pub fn max_results(mut self, value: usize) -> Self {
358        self.max_results = value;
359        self
360    }
361
362    /// Build the `QueryConfig`
363    #[must_use]
364    pub fn build(self) -> QueryConfig {
365        QueryConfig {
366            include_metadata: self.include_metadata,
367            flatten_lists: self.flatten_lists,
368            include_children: self.include_children,
369            max_results: self.max_results,
370        }
371    }
372}
373
374#[cfg(test)]
375mod tests {
376    use super::*;
377    use hedl_core::parse;
378
379    /// Helper to parse HEDL from string for tests
380    fn parse_hedl(input: &str) -> Document {
381        // Prepend HEDL header if not present, or separate header from body if needed
382        let hedl = if input.contains("%VERSION") || input.starts_with("%HEDL") {
383            input.to_string()
384        } else if input.contains("%STRUCT") || input.contains("%NEST") {
385            // Has directives but no VERSION - add VERSION and ensure separator
386            let (header, body) = if input.contains("---") {
387                let parts: Vec<&str> = input.splitn(2, "---").collect();
388                (
389                    parts[0].trim().to_string(),
390                    parts
391                        .get(1)
392                        .map(|s| s.trim().to_string())
393                        .unwrap_or_default(),
394                )
395            } else {
396                // Extract directives to header
397                let mut header_lines = Vec::new();
398                let mut body_lines = Vec::new();
399                for line in input.lines() {
400                    if line.trim().starts_with('%') {
401                        header_lines.push(line.to_string());
402                    } else {
403                        body_lines.push(line.to_string());
404                    }
405                }
406                (header_lines.join("\n"), body_lines.join("\n"))
407            };
408            format!("%V:2.0\n%NULL:~\n%QUOTE:\"\n{header}\n---\n{body}")
409        } else {
410            format!("%V:2.0\n%NULL:~\n%QUOTE:\"\n---\n{input}")
411        };
412        parse(hedl.as_bytes()).unwrap()
413    }
414
415    // ==================== Basic Query Tests ====================
416
417    #[test]
418    fn test_query_simple_field() {
419        let doc = parse_hedl("name: \"Alice\"");
420        let config = QueryConfig::default();
421
422        let results = query(&doc, "$.name", &config).unwrap();
423        assert_eq!(results.len(), 1);
424        assert_eq!(results[0].as_str(), Some("Alice"));
425    }
426
427    #[test]
428    fn test_query_nested_field() {
429        let doc = parse_hedl("user:\n name: \"Bob\"\n age: 25");
430        let config = QueryConfig::default();
431
432        let results = query(&doc, "$.user.name", &config).unwrap();
433        assert_eq!(results.len(), 1);
434        assert_eq!(results[0].as_str(), Some("Bob"));
435    }
436
437    #[test]
438    fn test_query_missing_field() {
439        let doc = parse_hedl("name: \"Alice\"");
440        let config = QueryConfig::default();
441
442        let results = query(&doc, "$.missing", &config).unwrap();
443        assert_eq!(results.len(), 0);
444    }
445
446    #[test]
447    fn test_query_root() {
448        let doc = parse_hedl("name: \"Alice\"");
449        let config = QueryConfig::default();
450
451        let results = query(&doc, "$", &config).unwrap();
452        assert_eq!(results.len(), 1);
453        assert!(results[0].is_object());
454    }
455
456    #[test]
457    fn test_query_wildcard() {
458        let doc = parse_hedl("a: 1\nb: 2\nc: 3");
459        let config = QueryConfig::default();
460
461        let results = query(&doc, "$.*", &config).unwrap();
462        assert_eq!(results.len(), 3);
463    }
464
465    // ==================== Query Helper Tests ====================
466
467    #[test]
468    fn test_query_first_success() {
469        let doc = parse_hedl("name: \"Alice\"");
470        let config = QueryConfig::default();
471
472        let result = query_first(&doc, "$.name", &config).unwrap();
473        assert!(result.is_some());
474        assert_eq!(result.unwrap().as_str(), Some("Alice"));
475    }
476
477    #[test]
478    fn test_query_first_no_match() {
479        let doc = parse_hedl("name: \"Alice\"");
480        let config = QueryConfig::default();
481
482        let result = query_first(&doc, "$.missing", &config).unwrap();
483        assert!(result.is_none());
484    }
485
486    #[test]
487    fn test_query_single_success() {
488        let doc = parse_hedl("name: \"Alice\"");
489        let config = QueryConfig::default();
490
491        let result = query_single(&doc, "$.name", &config).unwrap();
492        assert_eq!(result.as_str(), Some("Alice"));
493    }
494
495    #[test]
496    fn test_query_single_no_results() {
497        let doc = parse_hedl("name: \"Alice\"");
498        let config = QueryConfig::default();
499
500        let result = query_single(&doc, "$.missing", &config);
501        assert!(result.is_err());
502        assert!(matches!(result.unwrap_err(), QueryError::ExecutionError(_)));
503    }
504
505    #[test]
506    fn test_query_single_multiple_results() {
507        let doc = parse_hedl("a: 1\nb: 2");
508        let config = QueryConfig::default();
509
510        let result = query_single(&doc, "$.*", &config);
511        assert!(result.is_err());
512        assert!(matches!(result.unwrap_err(), QueryError::ExecutionError(_)));
513    }
514
515    #[test]
516    fn test_query_exists_true() {
517        let doc = parse_hedl("name: \"Alice\"");
518        let config = QueryConfig::default();
519
520        assert!(query_exists(&doc, "$.name", &config).unwrap());
521    }
522
523    #[test]
524    fn test_query_exists_false() {
525        let doc = parse_hedl("name: \"Alice\"");
526        let config = QueryConfig::default();
527
528        assert!(!query_exists(&doc, "$.missing", &config).unwrap());
529    }
530
531    #[test]
532    fn test_query_count() {
533        let doc = parse_hedl("a: 1\nb: 2\nc: 3");
534        let config = QueryConfig::default();
535
536        let count = query_count(&doc, "$.*", &config).unwrap();
537        assert_eq!(count, 3);
538    }
539
540    #[test]
541    fn test_query_count_zero() {
542        let doc = parse_hedl("name: \"Alice\"");
543        let config = QueryConfig::default();
544
545        let count = query_count(&doc, "$.missing", &config).unwrap();
546        assert_eq!(count, 0);
547    }
548
549    // ==================== Configuration Tests ====================
550
551    #[test]
552    fn test_config_builder() {
553        let config = QueryConfigBuilder::new()
554            .include_metadata(true)
555            .flatten_lists(true)
556            .include_children(false)
557            .max_results(10)
558            .build();
559
560        assert!(config.include_metadata);
561        assert!(config.flatten_lists);
562        assert!(!config.include_children);
563        assert_eq!(config.max_results, 10);
564    }
565
566    #[test]
567    fn test_config_default() {
568        let config = QueryConfig::default();
569        assert!(!config.include_metadata);
570        assert!(!config.flatten_lists);
571        assert!(config.include_children);
572        assert_eq!(config.max_results, 0);
573    }
574
575    #[test]
576    fn test_config_max_results() {
577        let doc = parse_hedl("a: 1\nb: 2\nc: 3\nd: 4");
578        let config = QueryConfigBuilder::new().max_results(2).build();
579
580        let results = query(&doc, "$.*", &config).unwrap();
581        assert_eq!(results.len(), 2);
582    }
583
584    // ==================== Error Handling Tests ====================
585
586    #[test]
587    fn test_invalid_jsonpath_expression() {
588        let doc = parse_hedl("name: \"Alice\"");
589        let config = QueryConfig::default();
590
591        let result = query(&doc, "$$invalid", &config);
592        assert!(result.is_err());
593        assert!(matches!(
594            result.unwrap_err(),
595            QueryError::InvalidExpression(_)
596        ));
597    }
598
599    #[test]
600    fn test_error_display() {
601        let err = QueryError::InvalidExpression("test error".to_string());
602        let msg = format!("{err}");
603        assert!(msg.contains("Invalid JSONPath expression"));
604        assert!(msg.contains("test error"));
605    }
606
607    #[test]
608    fn test_error_equality() {
609        let err1 = QueryError::InvalidExpression("test".to_string());
610        let err2 = QueryError::InvalidExpression("test".to_string());
611        assert_eq!(err1, err2);
612    }
613
614    #[test]
615    fn test_error_clone() {
616        let err1 = QueryError::ConversionError("test".to_string());
617        let err2 = err1.clone();
618        assert_eq!(err1, err2);
619    }
620
621    // ==================== Complex Query Tests ====================
622
623    #[test]
624    fn test_query_nested_objects() {
625        let doc = parse_hedl("user:\n profile:\n  name: \"Alice\"\n  age: 30");
626        let config = QueryConfig::default();
627
628        let results = query(&doc, "$.user.profile.name", &config).unwrap();
629        assert_eq!(results[0].as_str(), Some("Alice"));
630    }
631
632    #[test]
633    fn test_query_multiple_values() {
634        let doc = parse_hedl("a: 1\nb: 2\nc: 3");
635        let config = QueryConfig::default();
636
637        let results = query(&doc, "$.*", &config).unwrap();
638        assert_eq!(results.len(), 3);
639
640        let sum: i64 = results.iter().filter_map(serde_json::Value::as_i64).sum();
641        assert_eq!(sum, 6);
642    }
643
644    #[test]
645    fn test_query_with_numbers() {
646        let doc = parse_hedl("count: 42\nprice: 19.99");
647        let config = QueryConfig::default();
648
649        let count = query_single(&doc, "$.count", &config).unwrap();
650        assert_eq!(count.as_i64(), Some(42));
651
652        let price = query_single(&doc, "$.price", &config).unwrap();
653        assert_eq!(price.as_f64(), Some(19.99));
654    }
655
656    #[test]
657    fn test_query_with_booleans() {
658        let doc = parse_hedl("active: true\ndeleted: false");
659        let config = QueryConfig::default();
660
661        let active = query_single(&doc, "$.active", &config).unwrap();
662        assert_eq!(active.as_bool(), Some(true));
663
664        let deleted = query_single(&doc, "$.deleted", &config).unwrap();
665        assert_eq!(deleted.as_bool(), Some(false));
666    }
667
668    #[test]
669    fn test_query_with_null() {
670        let doc = parse_hedl("value: ~");
671        let config = QueryConfig::default();
672
673        let result = query_single(&doc, "$.value", &config).unwrap();
674        assert!(result.is_null());
675    }
676
677    // ==================== Edge Cases ====================
678
679    #[test]
680    fn test_query_empty_document() {
681        let doc = parse_hedl("");
682        let config = QueryConfig::default();
683
684        let results = query(&doc, "$", &config).unwrap();
685        assert_eq!(results.len(), 1);
686        assert!(results[0].is_object());
687    }
688
689    #[test]
690    fn test_query_unicode_fields() {
691        // HEDL field names must be ASCII identifiers, but values can be unicode
692        let doc = parse_hedl("name: \"太郎\"");
693        let config = QueryConfig::default();
694
695        let results = query(&doc, "$.name", &config).unwrap();
696        assert_eq!(results.len(), 1);
697        assert_eq!(results[0].as_str(), Some("太郎"));
698    }
699
700    #[test]
701    fn test_query_with_special_characters() {
702        // HEDL field names must be valid identifiers (no hyphens)
703        // Use underscore instead, and bracket notation still works
704        let doc = parse_hedl("field_name: \"value\"");
705        let config = QueryConfig::default();
706
707        let results = query(&doc, "$['field_name']", &config).unwrap();
708        assert_eq!(results.len(), 1);
709        assert_eq!(results[0].as_str(), Some("value"));
710    }
711
712    #[test]
713    fn test_query_max_results_zero() {
714        let doc = parse_hedl("a: 1\nb: 2\nc: 3");
715        let config = QueryConfigBuilder::new().max_results(0).build();
716
717        let results = query(&doc, "$.*", &config).unwrap();
718        assert_eq!(results.len(), 3); // 0 means unlimited
719    }
720
721    // ==================== Integration Tests ====================
722
723    #[test]
724    fn test_query_builder_chain() {
725        let config = QueryConfigBuilder::new()
726            .include_metadata(true)
727            .flatten_lists(false)
728            .max_results(5)
729            .include_children(true)
730            .build();
731
732        assert!(config.include_metadata);
733        assert!(!config.flatten_lists);
734        assert!(config.include_children);
735        assert_eq!(config.max_results, 5);
736    }
737
738    #[test]
739    fn test_config_to_json_config_conversion() {
740        let query_config = QueryConfigBuilder::new()
741            .include_metadata(true)
742            .flatten_lists(true)
743            .include_children(false)
744            .build();
745
746        let json_config: ToJsonConfig = (&query_config).into();
747
748        assert!(json_config.include_metadata);
749        assert!(json_config.flatten_lists);
750        assert!(!json_config.include_children);
751    }
752}