hedl_json/
jsonpath.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! `JSONPath` query support for HEDL documents
19//!
20//! This module provides `JSONPath` query functionality for HEDL documents,
21//! allowing efficient extraction of specific data using standard `JSONPath` syntax.
22//!
23//! # Features
24//!
25//! - **Standard `JSONPath` Syntax**: Full support for `JSONPath` expressions
26//! - **Efficient Queries**: Optimized query execution with minimal allocations
27//! - **Type-Safe Results**: Returns strongly-typed query results
28//! - **Error Handling**: Comprehensive error reporting for invalid queries
29//!
30//! # Examples
31//!
32//! ```text
33//! use hedl_json::jsonpath::{query, QueryConfig};
34//! use hedl_core::Document;
35//!
36//! fn example() -> Result<(), Box<dyn std::error::Error>> {
37//!     let doc = hedl_core::parse("name: \"Alice\"\nage: 30".as_bytes())?;
38//!     let config = QueryConfig::default();
39//!
40//!     // Simple field access
41//!     let results = query(&doc, "$.name", &config)?;
42//!     assert_eq!(results.len(), 1);
43//!
44//!     // Array filtering
45//!     let results = query(&doc, "$.users[?(@.age > 25)]", &config)?;
46//!     Ok(())
47//! }
48//! ```
49
50use hedl_core::Document;
51use serde_json::Value as JsonValue;
52use serde_json_path::JsonPath;
53use std::str::FromStr;
54use thiserror::Error;
55
56use crate::{to_json_value, ToJsonConfig};
57
58/// Errors that can occur during `JSONPath` queries
59#[derive(Debug, Error, Clone, PartialEq)]
60pub enum QueryError {
61    /// Invalid `JSONPath` expression
62    #[error("Invalid JSONPath expression: {0}")]
63    InvalidExpression(String),
64
65    /// Document conversion error
66    #[error("Failed to convert HEDL document to JSON: {0}")]
67    ConversionError(String),
68
69    /// Query execution error
70    #[error("Query execution failed: {0}")]
71    ExecutionError(String),
72}
73
74/// Result type for `JSONPath` queries
75pub type QueryResult<T> = Result<T, QueryError>;
76
77/// Configuration for `JSONPath` queries
78#[derive(Debug, Clone)]
79pub struct QueryConfig {
80    /// Include HEDL metadata in JSON conversion
81    pub include_metadata: bool,
82
83    /// Flatten matrix lists to plain arrays
84    pub flatten_lists: bool,
85
86    /// Include children as nested arrays
87    pub include_children: bool,
88
89    /// Maximum number of results to return (0 = unlimited)
90    pub max_results: usize,
91}
92
93impl Default for QueryConfig {
94    fn default() -> Self {
95        Self {
96            include_metadata: false,
97            flatten_lists: false,
98            include_children: true,
99            max_results: 0, // Unlimited
100        }
101    }
102}
103
104impl From<&QueryConfig> for ToJsonConfig {
105    fn from(config: &QueryConfig) -> Self {
106        ToJsonConfig {
107            include_metadata: config.include_metadata,
108            flatten_lists: config.flatten_lists,
109            include_children: config.include_children,
110            ascii_safe: false,
111        }
112    }
113}
114
115/// Query a HEDL document using `JSONPath` expression
116///
117/// # Arguments
118///
119/// * `doc` - The HEDL document to query
120/// * `path` - `JSONPath` expression (e.g., "$.users[*].name")
121/// * `config` - Query configuration
122///
123/// # Returns
124///
125/// Vector of matching JSON values
126///
127/// # Examples
128///
129/// ```text
130/// use hedl_json::jsonpath::{query, QueryConfig};
131/// use hedl_core::Document;
132///
133/// fn example() -> Result<(), Box<dyn std::error::Error>> {
134///     let doc = hedl_core::parse("users: [@User]\n  u1 Alice 30".as_bytes())?;
135///     let config = QueryConfig::default();
136///
137///     let results = query(&doc, "$.users", &config)?;
138///     assert!(!results.is_empty());
139///     Ok(())
140/// }
141/// ```
142pub fn query(doc: &Document, path: &str, config: &QueryConfig) -> QueryResult<Vec<JsonValue>> {
143    // Convert HEDL document to JSON
144    let json_config: ToJsonConfig = config.into();
145    let json_value = to_json_value(doc, &json_config).map_err(QueryError::ConversionError)?;
146
147    // Parse JSONPath expression
148    let json_path =
149        JsonPath::from_str(path).map_err(|e| QueryError::InvalidExpression(format!("{e}")))?;
150
151    // Execute query
152    let node_list = json_path.query(&json_value);
153
154    // Collect results with optional limit
155    let results: Vec<JsonValue> = if config.max_results > 0 {
156        node_list
157            .into_iter()
158            .take(config.max_results)
159            .cloned()
160            .collect()
161    } else {
162        node_list.all().into_iter().cloned().collect()
163    };
164
165    Ok(results)
166}
167
168/// Query a HEDL document and return the first match
169///
170/// Convenience function for queries expected to return a single result.
171///
172/// # Arguments
173///
174/// * `doc` - The HEDL document to query
175/// * `path` - `JSONPath` expression
176/// * `config` - Query configuration
177///
178/// # Returns
179///
180/// The first matching JSON value, or None if no matches found
181///
182/// # Examples
183///
184/// ```text
185/// use hedl_json::jsonpath::{query_first, QueryConfig};
186/// use hedl_core::Document;
187///
188/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
189/// let doc = hedl_core::parse_hedl("name: \"Alice\""?;
190/// let config = QueryConfig::default();
191///
192/// let result = query_first(&doc, "$.name", &config)?;
193/// assert!(result.is_some());
194/// # Ok(())
195/// # }
196/// ```
197pub fn query_first(
198    doc: &Document,
199    path: &str,
200    config: &QueryConfig,
201) -> QueryResult<Option<JsonValue>> {
202    let results = query(doc, path, config)?;
203    Ok(results.into_iter().next())
204}
205
206/// Query a HEDL document and return a single expected match
207///
208/// Returns an error if the query returns zero or multiple results.
209///
210/// # Arguments
211///
212/// * `doc` - The HEDL document to query
213/// * `path` - `JSONPath` expression
214/// * `config` - Query configuration
215///
216/// # Returns
217///
218/// The single matching JSON value
219///
220/// # Errors
221///
222/// Returns error if zero or multiple matches found
223///
224/// # Examples
225///
226/// ```text
227/// use hedl_json::jsonpath::{query_single, QueryConfig};
228/// use hedl_core::Document;
229///
230/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
231/// let doc = hedl_core::parse_hedl("name: \"Alice\""?;
232/// let config = QueryConfig::default();
233///
234/// let result = query_single(&doc, "$.name", &config)?;
235/// assert_eq!(result.as_str(), Some("Alice"));
236/// # Ok(())
237/// # }
238/// ```
239pub fn query_single(doc: &Document, path: &str, config: &QueryConfig) -> QueryResult<JsonValue> {
240    let results = query(doc, path, config)?;
241
242    match results.len() {
243        0 => Err(QueryError::ExecutionError(
244            "Query returned no results".to_string(),
245        )),
246        1 => Ok(results.into_iter().next().unwrap()),
247        n => Err(QueryError::ExecutionError(format!(
248            "Query returned {n} results, expected exactly 1"
249        ))),
250    }
251}
252
253/// Check if a `JSONPath` query matches any elements in a HEDL document
254///
255/// # Arguments
256///
257/// * `doc` - The HEDL document to query
258/// * `path` - `JSONPath` expression
259/// * `config` - Query configuration
260///
261/// # Returns
262///
263/// true if at least one match found, false otherwise
264///
265/// # Examples
266///
267/// ```text
268/// use hedl_json::jsonpath::{query_exists, QueryConfig};
269/// use hedl_core::Document;
270///
271/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
272/// let doc = hedl_core::parse_hedl("name: \"Alice\""?;
273/// let config = QueryConfig::default();
274///
275/// assert!(query_exists(&doc, "$.name", &config)?);
276/// assert!(!query_exists(&doc, "$.missing", &config)?);
277/// # Ok(())
278/// # }
279/// ```
280pub fn query_exists(doc: &Document, path: &str, config: &QueryConfig) -> QueryResult<bool> {
281    let results = query(doc, path, config)?;
282    Ok(!results.is_empty())
283}
284
285/// Count the number of matches for a `JSONPath` query
286///
287/// # Arguments
288///
289/// * `doc` - The HEDL document to query
290/// * `path` - `JSONPath` expression
291/// * `config` - Query configuration
292///
293/// # Returns
294///
295/// Number of matching elements
296///
297/// # Examples
298///
299/// ```text
300/// use hedl_json::jsonpath::{query_count, QueryConfig};
301/// use hedl_core::Document;
302///
303/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
304/// let doc = hedl_core::parse_hedl("items: [1, 2, 3]")?;
305/// let config = QueryConfig::default();
306///
307/// let count = query_count(&doc, "$.items[*]", &config)?;
308/// assert_eq!(count, 3);
309/// # Ok(())
310/// # }
311/// ```
312pub fn query_count(doc: &Document, path: &str, config: &QueryConfig) -> QueryResult<usize> {
313    let results = query(doc, path, config)?;
314    Ok(results.len())
315}
316
317/// Builder for constructing `QueryConfig` instances
318#[derive(Debug, Default)]
319pub struct QueryConfigBuilder {
320    include_metadata: bool,
321    flatten_lists: bool,
322    include_children: bool,
323    max_results: usize,
324}
325
326impl QueryConfigBuilder {
327    /// Create a new `QueryConfigBuilder`
328    #[must_use]
329    pub fn new() -> Self {
330        Self::default()
331    }
332
333    /// Include HEDL metadata in JSON conversion
334    #[must_use]
335    pub fn include_metadata(mut self, value: bool) -> Self {
336        self.include_metadata = value;
337        self
338    }
339
340    /// Flatten matrix lists to plain arrays
341    #[must_use]
342    pub fn flatten_lists(mut self, value: bool) -> Self {
343        self.flatten_lists = value;
344        self
345    }
346
347    /// Include children as nested arrays
348    #[must_use]
349    pub fn include_children(mut self, value: bool) -> Self {
350        self.include_children = value;
351        self
352    }
353
354    /// Set maximum number of results (0 = unlimited)
355    #[must_use]
356    pub fn max_results(mut self, value: usize) -> Self {
357        self.max_results = value;
358        self
359    }
360
361    /// Build the `QueryConfig`
362    #[must_use]
363    pub fn build(self) -> QueryConfig {
364        QueryConfig {
365            include_metadata: self.include_metadata,
366            flatten_lists: self.flatten_lists,
367            include_children: self.include_children,
368            max_results: self.max_results,
369        }
370    }
371}
372
373#[cfg(test)]
374mod tests {
375    use super::*;
376    use hedl_core::parse;
377
378    /// Helper to parse HEDL from string for tests
379    fn parse_hedl(input: &str) -> Document {
380        // Prepend HEDL header if not present, or separate header from body if needed
381        let hedl = if input.contains("%VERSION") || input.starts_with("%HEDL") {
382            input.to_string()
383        } else if input.contains("%STRUCT") || input.contains("%NEST") {
384            // Has directives but no VERSION - add VERSION and ensure separator
385            let (header, body) = if input.contains("---") {
386                let parts: Vec<&str> = input.splitn(2, "---").collect();
387                (
388                    parts[0].trim().to_string(),
389                    parts
390                        .get(1)
391                        .map(|s| s.trim().to_string())
392                        .unwrap_or_default(),
393                )
394            } else {
395                // Extract directives to header
396                let mut header_lines = Vec::new();
397                let mut body_lines = Vec::new();
398                for line in input.lines() {
399                    if line.trim().starts_with('%') {
400                        header_lines.push(line.to_string());
401                    } else {
402                        body_lines.push(line.to_string());
403                    }
404                }
405                (header_lines.join("\n"), body_lines.join("\n"))
406            };
407            format!("%VERSION: 1.0\n{header}\n---\n{body}")
408        } else {
409            format!("%VERSION: 1.0\n---\n{input}")
410        };
411        parse(hedl.as_bytes()).unwrap()
412    }
413
414    // ==================== Basic Query Tests ====================
415
416    #[test]
417    fn test_query_simple_field() {
418        let doc = parse_hedl("name: \"Alice\"");
419        let config = QueryConfig::default();
420
421        let results = query(&doc, "$.name", &config).unwrap();
422        assert_eq!(results.len(), 1);
423        assert_eq!(results[0].as_str(), Some("Alice"));
424    }
425
426    #[test]
427    fn test_query_nested_field() {
428        let doc = parse_hedl("user:\n  name: \"Bob\"\n  age: 25");
429        let config = QueryConfig::default();
430
431        let results = query(&doc, "$.user.name", &config).unwrap();
432        assert_eq!(results.len(), 1);
433        assert_eq!(results[0].as_str(), Some("Bob"));
434    }
435
436    #[test]
437    fn test_query_missing_field() {
438        let doc = parse_hedl("name: \"Alice\"");
439        let config = QueryConfig::default();
440
441        let results = query(&doc, "$.missing", &config).unwrap();
442        assert_eq!(results.len(), 0);
443    }
444
445    #[test]
446    fn test_query_root() {
447        let doc = parse_hedl("name: \"Alice\"");
448        let config = QueryConfig::default();
449
450        let results = query(&doc, "$", &config).unwrap();
451        assert_eq!(results.len(), 1);
452        assert!(results[0].is_object());
453    }
454
455    #[test]
456    fn test_query_wildcard() {
457        let doc = parse_hedl("a: 1\nb: 2\nc: 3");
458        let config = QueryConfig::default();
459
460        let results = query(&doc, "$.*", &config).unwrap();
461        assert_eq!(results.len(), 3);
462    }
463
464    // ==================== Query Helper Tests ====================
465
466    #[test]
467    fn test_query_first_success() {
468        let doc = parse_hedl("name: \"Alice\"");
469        let config = QueryConfig::default();
470
471        let result = query_first(&doc, "$.name", &config).unwrap();
472        assert!(result.is_some());
473        assert_eq!(result.unwrap().as_str(), Some("Alice"));
474    }
475
476    #[test]
477    fn test_query_first_no_match() {
478        let doc = parse_hedl("name: \"Alice\"");
479        let config = QueryConfig::default();
480
481        let result = query_first(&doc, "$.missing", &config).unwrap();
482        assert!(result.is_none());
483    }
484
485    #[test]
486    fn test_query_single_success() {
487        let doc = parse_hedl("name: \"Alice\"");
488        let config = QueryConfig::default();
489
490        let result = query_single(&doc, "$.name", &config).unwrap();
491        assert_eq!(result.as_str(), Some("Alice"));
492    }
493
494    #[test]
495    fn test_query_single_no_results() {
496        let doc = parse_hedl("name: \"Alice\"");
497        let config = QueryConfig::default();
498
499        let result = query_single(&doc, "$.missing", &config);
500        assert!(result.is_err());
501        assert!(matches!(result.unwrap_err(), QueryError::ExecutionError(_)));
502    }
503
504    #[test]
505    fn test_query_single_multiple_results() {
506        let doc = parse_hedl("a: 1\nb: 2");
507        let config = QueryConfig::default();
508
509        let result = query_single(&doc, "$.*", &config);
510        assert!(result.is_err());
511        assert!(matches!(result.unwrap_err(), QueryError::ExecutionError(_)));
512    }
513
514    #[test]
515    fn test_query_exists_true() {
516        let doc = parse_hedl("name: \"Alice\"");
517        let config = QueryConfig::default();
518
519        assert!(query_exists(&doc, "$.name", &config).unwrap());
520    }
521
522    #[test]
523    fn test_query_exists_false() {
524        let doc = parse_hedl("name: \"Alice\"");
525        let config = QueryConfig::default();
526
527        assert!(!query_exists(&doc, "$.missing", &config).unwrap());
528    }
529
530    #[test]
531    fn test_query_count() {
532        let doc = parse_hedl("a: 1\nb: 2\nc: 3");
533        let config = QueryConfig::default();
534
535        let count = query_count(&doc, "$.*", &config).unwrap();
536        assert_eq!(count, 3);
537    }
538
539    #[test]
540    fn test_query_count_zero() {
541        let doc = parse_hedl("name: \"Alice\"");
542        let config = QueryConfig::default();
543
544        let count = query_count(&doc, "$.missing", &config).unwrap();
545        assert_eq!(count, 0);
546    }
547
548    // ==================== Configuration Tests ====================
549
550    #[test]
551    fn test_config_builder() {
552        let config = QueryConfigBuilder::new()
553            .include_metadata(true)
554            .flatten_lists(true)
555            .include_children(false)
556            .max_results(10)
557            .build();
558
559        assert!(config.include_metadata);
560        assert!(config.flatten_lists);
561        assert!(!config.include_children);
562        assert_eq!(config.max_results, 10);
563    }
564
565    #[test]
566    fn test_config_default() {
567        let config = QueryConfig::default();
568        assert!(!config.include_metadata);
569        assert!(!config.flatten_lists);
570        assert!(config.include_children);
571        assert_eq!(config.max_results, 0);
572    }
573
574    #[test]
575    fn test_config_max_results() {
576        let doc = parse_hedl("a: 1\nb: 2\nc: 3\nd: 4");
577        let config = QueryConfigBuilder::new().max_results(2).build();
578
579        let results = query(&doc, "$.*", &config).unwrap();
580        assert_eq!(results.len(), 2);
581    }
582
583    // ==================== Error Handling Tests ====================
584
585    #[test]
586    fn test_invalid_jsonpath_expression() {
587        let doc = parse_hedl("name: \"Alice\"");
588        let config = QueryConfig::default();
589
590        let result = query(&doc, "$$invalid", &config);
591        assert!(result.is_err());
592        assert!(matches!(
593            result.unwrap_err(),
594            QueryError::InvalidExpression(_)
595        ));
596    }
597
598    #[test]
599    fn test_error_display() {
600        let err = QueryError::InvalidExpression("test error".to_string());
601        let msg = format!("{err}");
602        assert!(msg.contains("Invalid JSONPath expression"));
603        assert!(msg.contains("test error"));
604    }
605
606    #[test]
607    fn test_error_equality() {
608        let err1 = QueryError::InvalidExpression("test".to_string());
609        let err2 = QueryError::InvalidExpression("test".to_string());
610        assert_eq!(err1, err2);
611    }
612
613    #[test]
614    fn test_error_clone() {
615        let err1 = QueryError::ConversionError("test".to_string());
616        let err2 = err1.clone();
617        assert_eq!(err1, err2);
618    }
619
620    // ==================== Complex Query Tests ====================
621
622    #[test]
623    fn test_query_nested_objects() {
624        let doc = parse_hedl("user:\n  profile:\n    name: \"Alice\"\n    age: 30");
625        let config = QueryConfig::default();
626
627        let results = query(&doc, "$.user.profile.name", &config).unwrap();
628        assert_eq!(results[0].as_str(), Some("Alice"));
629    }
630
631    #[test]
632    fn test_query_multiple_values() {
633        let doc = parse_hedl("a: 1\nb: 2\nc: 3");
634        let config = QueryConfig::default();
635
636        let results = query(&doc, "$.*", &config).unwrap();
637        assert_eq!(results.len(), 3);
638
639        let sum: i64 = results.iter().filter_map(serde_json::Value::as_i64).sum();
640        assert_eq!(sum, 6);
641    }
642
643    #[test]
644    fn test_query_with_numbers() {
645        let doc = parse_hedl("count: 42\nprice: 19.99");
646        let config = QueryConfig::default();
647
648        let count = query_single(&doc, "$.count", &config).unwrap();
649        assert_eq!(count.as_i64(), Some(42));
650
651        let price = query_single(&doc, "$.price", &config).unwrap();
652        assert_eq!(price.as_f64(), Some(19.99));
653    }
654
655    #[test]
656    fn test_query_with_booleans() {
657        let doc = parse_hedl("active: true\ndeleted: false");
658        let config = QueryConfig::default();
659
660        let active = query_single(&doc, "$.active", &config).unwrap();
661        assert_eq!(active.as_bool(), Some(true));
662
663        let deleted = query_single(&doc, "$.deleted", &config).unwrap();
664        assert_eq!(deleted.as_bool(), Some(false));
665    }
666
667    #[test]
668    fn test_query_with_null() {
669        let doc = parse_hedl("value: ~");
670        let config = QueryConfig::default();
671
672        let result = query_single(&doc, "$.value", &config).unwrap();
673        assert!(result.is_null());
674    }
675
676    // ==================== Edge Cases ====================
677
678    #[test]
679    fn test_query_empty_document() {
680        let doc = parse_hedl("");
681        let config = QueryConfig::default();
682
683        let results = query(&doc, "$", &config).unwrap();
684        assert_eq!(results.len(), 1);
685        assert!(results[0].is_object());
686    }
687
688    #[test]
689    fn test_query_unicode_fields() {
690        // HEDL field names must be ASCII identifiers, but values can be unicode
691        let doc = parse_hedl("name: \"太郎\"");
692        let config = QueryConfig::default();
693
694        let results = query(&doc, "$.name", &config).unwrap();
695        assert_eq!(results.len(), 1);
696        assert_eq!(results[0].as_str(), Some("太郎"));
697    }
698
699    #[test]
700    fn test_query_with_special_characters() {
701        // HEDL field names must be valid identifiers (no hyphens)
702        // Use underscore instead, and bracket notation still works
703        let doc = parse_hedl("field_name: \"value\"");
704        let config = QueryConfig::default();
705
706        let results = query(&doc, "$['field_name']", &config).unwrap();
707        assert_eq!(results.len(), 1);
708        assert_eq!(results[0].as_str(), Some("value"));
709    }
710
711    #[test]
712    fn test_query_max_results_zero() {
713        let doc = parse_hedl("a: 1\nb: 2\nc: 3");
714        let config = QueryConfigBuilder::new().max_results(0).build();
715
716        let results = query(&doc, "$.*", &config).unwrap();
717        assert_eq!(results.len(), 3); // 0 means unlimited
718    }
719
720    // ==================== Integration Tests ====================
721
722    #[test]
723    fn test_query_builder_chain() {
724        let config = QueryConfigBuilder::new()
725            .include_metadata(true)
726            .flatten_lists(false)
727            .max_results(5)
728            .include_children(true)
729            .build();
730
731        assert!(config.include_metadata);
732        assert!(!config.flatten_lists);
733        assert!(config.include_children);
734        assert_eq!(config.max_results, 5);
735    }
736
737    #[test]
738    fn test_config_to_json_config_conversion() {
739        let query_config = QueryConfigBuilder::new()
740            .include_metadata(true)
741            .flatten_lists(true)
742            .include_children(false)
743            .build();
744
745        let json_config: ToJsonConfig = (&query_config).into();
746
747        assert!(json_config.include_metadata);
748        assert!(json_config.flatten_lists);
749        assert!(!json_config.include_children);
750    }
751}