Skip to main content

helios_persistence/composite/
analyzer.rs

1//! Query feature detection and analysis.
2//!
3//! This module provides query analysis to detect features that determine
4//! which backends should handle different parts of a query.
5//!
6//! # Feature Detection Rules
7//!
8//! The analyzer detects features based on query characteristics:
9//!
10//! | Feature | Detection |
11//! |---------|-----------|
12//! | ChainedSearch | Parameters with non-empty `chain` field |
13//! | ReverseChaining | `_has` parameter |
14//! | FullTextSearch | `_text` or `_content` parameters |
15//! | TerminologySearch | Modifiers `:above`, `:below`, `:in`, `:not-in` |
16//! | Include | `_include` directives |
17//! | Revinclude | `_revinclude` directives |
18
19use std::collections::{HashMap, HashSet};
20
21use serde::{Deserialize, Serialize};
22
23use crate::core::BackendCapability;
24use crate::types::{IncludeType, SearchModifier, SearchParamType, SearchParameter, SearchQuery};
25
26/// Features detected in a search query.
27///
28/// These features are used to route queries to appropriate backends.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
30#[serde(rename_all = "snake_case")]
31pub enum QueryFeature {
32    /// Basic search with simple parameters.
33    BasicSearch,
34
35    /// ID lookup (_id parameter).
36    IdLookup,
37
38    /// String parameter search.
39    StringSearch,
40
41    /// Token parameter search.
42    TokenSearch,
43
44    /// Date parameter search.
45    DateSearch,
46
47    /// Number parameter search.
48    NumberSearch,
49
50    /// Quantity parameter search.
51    QuantitySearch,
52
53    /// Reference parameter search.
54    ReferenceSearch,
55
56    /// URI parameter search.
57    UriSearch,
58
59    /// Composite parameter search.
60    CompositeSearch,
61
62    /// Chained parameter search (e.g., patient.name).
63    ChainedSearch,
64
65    /// Reverse chaining (_has parameter).
66    ReverseChaining,
67
68    /// Full-text search (_text, _content).
69    FullTextSearch,
70
71    /// Terminology expansion (:above, :below, :in, :not-in).
72    TerminologySearch,
73
74    /// _include directive.
75    Include,
76
77    /// _revinclude directive.
78    Revinclude,
79
80    /// Iterate include (_include:iterate).
81    IterateInclude,
82
83    /// Sorting (_sort parameter).
84    Sorting,
85
86    /// Cursor-based pagination.
87    CursorPagination,
88
89    /// Offset-based pagination.
90    OffsetPagination,
91
92    /// Total count requested.
93    TotalCount,
94
95    /// Summary mode requested.
96    Summary,
97}
98
99impl QueryFeature {
100    /// Returns the backend capability required for this feature.
101    pub fn required_capability(&self) -> Option<BackendCapability> {
102        match self {
103            QueryFeature::BasicSearch
104            | QueryFeature::IdLookup
105            | QueryFeature::StringSearch
106            | QueryFeature::TokenSearch
107            | QueryFeature::ReferenceSearch
108            | QueryFeature::UriSearch
109            | QueryFeature::CompositeSearch => Some(BackendCapability::BasicSearch),
110
111            QueryFeature::DateSearch => Some(BackendCapability::DateSearch),
112            QueryFeature::NumberSearch | QueryFeature::QuantitySearch => {
113                Some(BackendCapability::QuantitySearch)
114            }
115
116            QueryFeature::ChainedSearch => Some(BackendCapability::ChainedSearch),
117            QueryFeature::ReverseChaining => Some(BackendCapability::ReverseChaining),
118            QueryFeature::FullTextSearch => Some(BackendCapability::FullTextSearch),
119            QueryFeature::TerminologySearch => Some(BackendCapability::TerminologySearch),
120
121            QueryFeature::Include | QueryFeature::IterateInclude => {
122                Some(BackendCapability::Include)
123            }
124            QueryFeature::Revinclude => Some(BackendCapability::Revinclude),
125
126            QueryFeature::Sorting => Some(BackendCapability::Sorting),
127            QueryFeature::CursorPagination => Some(BackendCapability::CursorPagination),
128            QueryFeature::OffsetPagination => Some(BackendCapability::OffsetPagination),
129
130            QueryFeature::TotalCount | QueryFeature::Summary => None,
131        }
132    }
133
134    /// Returns true if this feature typically benefits from a specialized backend.
135    pub fn prefers_specialized_backend(&self) -> bool {
136        matches!(
137            self,
138            QueryFeature::ChainedSearch
139                | QueryFeature::ReverseChaining
140                | QueryFeature::FullTextSearch
141                | QueryFeature::TerminologySearch
142        )
143    }
144}
145
146/// Terminology operation type.
147#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
148pub enum TerminologyOp {
149    /// :above modifier - find codes above in hierarchy.
150    Above,
151    /// :below modifier - find codes below in hierarchy.
152    Below,
153    /// :in modifier - find codes in value set.
154    In,
155    /// :not-in modifier - find codes not in value set.
156    NotIn,
157}
158
159/// Result of analyzing a search query.
160#[derive(Debug, Clone)]
161pub struct QueryAnalysis {
162    /// All detected features.
163    pub features: HashSet<QueryFeature>,
164
165    /// Required backend capabilities.
166    pub required_capabilities: HashSet<BackendCapability>,
167
168    /// Estimated complexity score (1-10).
169    /// Higher scores indicate more complex queries.
170    pub complexity_score: u8,
171
172    /// Parameters grouped by feature.
173    pub feature_params: HashMap<QueryFeature, Vec<SearchParameter>>,
174
175    /// Whether the query can be split across multiple backends.
176    pub splittable: bool,
177
178    /// Features that benefit from specialized backends.
179    pub specialized_features: HashSet<QueryFeature>,
180
181    /// Detected terminology operations.
182    pub terminology_ops: Vec<(String, TerminologyOp)>,
183}
184
185impl QueryAnalysis {
186    /// Creates an empty analysis.
187    pub fn empty() -> Self {
188        Self {
189            features: HashSet::new(),
190            required_capabilities: HashSet::new(),
191            complexity_score: 1,
192            feature_params: HashMap::new(),
193            splittable: true,
194            specialized_features: HashSet::new(),
195            terminology_ops: Vec::new(),
196        }
197    }
198
199    /// Returns true if any advanced features are detected.
200    pub fn has_advanced_features(&self) -> bool {
201        self.features
202            .iter()
203            .any(|f| f.prefers_specialized_backend())
204    }
205
206    /// Returns true if the query uses chained parameters.
207    pub fn has_chaining(&self) -> bool {
208        self.features.contains(&QueryFeature::ChainedSearch)
209            || self.features.contains(&QueryFeature::ReverseChaining)
210    }
211
212    /// Returns true if the query uses full-text search.
213    pub fn has_fulltext(&self) -> bool {
214        self.features.contains(&QueryFeature::FullTextSearch)
215    }
216
217    /// Returns true if the query uses terminology operations.
218    pub fn has_terminology(&self) -> bool {
219        self.features.contains(&QueryFeature::TerminologySearch)
220    }
221
222    /// Returns true if the query uses includes.
223    pub fn has_includes(&self) -> bool {
224        self.features.contains(&QueryFeature::Include)
225            || self.features.contains(&QueryFeature::Revinclude)
226    }
227}
228
229/// Query analyzer that detects features in search queries.
230#[derive(Debug, Clone, Default)]
231pub struct QueryAnalyzer {
232    /// Custom feature patterns (for extensibility).
233    _custom_patterns: Vec<()>,
234}
235
236impl QueryAnalyzer {
237    /// Creates a new analyzer with default settings.
238    pub fn new() -> Self {
239        Self::default()
240    }
241
242    /// Analyzes a query and returns detected features.
243    pub fn analyze(&self, query: &SearchQuery) -> QueryAnalysis {
244        let mut analysis = QueryAnalysis::empty();
245
246        // Always add basic search if there are parameters
247        if !query.parameters.is_empty() || !query.includes.is_empty() {
248            analysis.features.insert(QueryFeature::BasicSearch);
249        }
250
251        // Analyze each parameter
252        for param in &query.parameters {
253            self.analyze_parameter(param, &mut analysis);
254        }
255
256        // Analyze reverse chains
257        for reverse_chain in &query.reverse_chains {
258            analysis.features.insert(QueryFeature::ReverseChaining);
259            analysis
260                .specialized_features
261                .insert(QueryFeature::ReverseChaining);
262
263            // Add to feature params with synthetic parameter
264            // We don't populate chain since that's for forward chaining
265            analysis
266                .feature_params
267                .entry(QueryFeature::ReverseChaining)
268                .or_default()
269                .push(SearchParameter {
270                    name: format!(
271                        "_has:{}:{}:{}",
272                        reverse_chain.source_type,
273                        reverse_chain.reference_param,
274                        reverse_chain.search_param
275                    ),
276                    param_type: SearchParamType::Special,
277                    modifier: None,
278                    values: reverse_chain.value.clone().into_iter().collect(),
279                    chain: vec![],
280                    components: vec![],
281                });
282        }
283
284        // Analyze includes
285        for include in &query.includes {
286            match include.include_type {
287                IncludeType::Include => {
288                    if include.iterate {
289                        analysis.features.insert(QueryFeature::IterateInclude);
290                    } else {
291                        analysis.features.insert(QueryFeature::Include);
292                    }
293                }
294                IncludeType::Revinclude => {
295                    analysis.features.insert(QueryFeature::Revinclude);
296                }
297            }
298        }
299
300        // Analyze sorting
301        if !query.sort.is_empty() {
302            analysis.features.insert(QueryFeature::Sorting);
303        }
304
305        // Analyze pagination
306        if query.cursor.is_some() {
307            analysis.features.insert(QueryFeature::CursorPagination);
308        } else if query.offset.is_some() {
309            analysis.features.insert(QueryFeature::OffsetPagination);
310        }
311
312        // Analyze total count
313        if query.total.is_some() {
314            analysis.features.insert(QueryFeature::TotalCount);
315        }
316
317        // Analyze summary
318        if query.summary.is_some() {
319            analysis.features.insert(QueryFeature::Summary);
320        }
321
322        // Calculate required capabilities
323        for feature in &analysis.features {
324            if let Some(cap) = feature.required_capability() {
325                analysis.required_capabilities.insert(cap);
326            }
327        }
328
329        // Calculate complexity score
330        analysis.complexity_score = self.calculate_complexity(&analysis);
331
332        // Determine if splittable
333        analysis.splittable = self.is_splittable(&analysis);
334
335        analysis
336    }
337
338    /// Analyzes a single parameter.
339    fn analyze_parameter(&self, param: &SearchParameter, analysis: &mut QueryAnalysis) {
340        // Check for ID lookup
341        if param.name == "_id" {
342            analysis.features.insert(QueryFeature::IdLookup);
343            return;
344        }
345
346        // Check for full-text search
347        if param.name == "_text" || param.name == "_content" {
348            analysis.features.insert(QueryFeature::FullTextSearch);
349            analysis
350                .specialized_features
351                .insert(QueryFeature::FullTextSearch);
352            analysis
353                .feature_params
354                .entry(QueryFeature::FullTextSearch)
355                .or_default()
356                .push(param.clone());
357            return;
358        }
359
360        // Check for chained search
361        if !param.chain.is_empty() {
362            analysis.features.insert(QueryFeature::ChainedSearch);
363            analysis
364                .specialized_features
365                .insert(QueryFeature::ChainedSearch);
366            analysis
367                .feature_params
368                .entry(QueryFeature::ChainedSearch)
369                .or_default()
370                .push(param.clone());
371        }
372
373        // Check for terminology modifiers
374        if let Some(ref modifier) = param.modifier {
375            if let Some(term_op) = self.parse_terminology_modifier(modifier) {
376                analysis.features.insert(QueryFeature::TerminologySearch);
377                analysis
378                    .specialized_features
379                    .insert(QueryFeature::TerminologySearch);
380                analysis.terminology_ops.push((param.name.clone(), term_op));
381                analysis
382                    .feature_params
383                    .entry(QueryFeature::TerminologySearch)
384                    .or_default()
385                    .push(param.clone());
386            }
387        }
388
389        // Check for text modifier (for token parameters)
390        if let Some(SearchModifier::Text) = param.modifier {
391            if param.param_type == SearchParamType::Token {
392                analysis.features.insert(QueryFeature::FullTextSearch);
393            }
394        }
395
396        // Detect parameter type features
397        let type_feature = match param.param_type {
398            SearchParamType::String => QueryFeature::StringSearch,
399            SearchParamType::Token => QueryFeature::TokenSearch,
400            SearchParamType::Date => QueryFeature::DateSearch,
401            SearchParamType::Number => QueryFeature::NumberSearch,
402            SearchParamType::Quantity => QueryFeature::QuantitySearch,
403            SearchParamType::Reference => QueryFeature::ReferenceSearch,
404            SearchParamType::Uri => QueryFeature::UriSearch,
405            SearchParamType::Composite => QueryFeature::CompositeSearch,
406            SearchParamType::Special => QueryFeature::BasicSearch,
407        };
408        analysis.features.insert(type_feature);
409
410        // Add to feature params for basic types (if not already categorized)
411        if param.chain.is_empty()
412            && !analysis
413                .specialized_features
414                .contains(&QueryFeature::TerminologySearch)
415        {
416            analysis
417                .feature_params
418                .entry(QueryFeature::BasicSearch)
419                .or_default()
420                .push(param.clone());
421        }
422    }
423
424    /// Parses a terminology modifier.
425    fn parse_terminology_modifier(&self, modifier: &SearchModifier) -> Option<TerminologyOp> {
426        match modifier {
427            SearchModifier::Above => Some(TerminologyOp::Above),
428            SearchModifier::Below => Some(TerminologyOp::Below),
429            SearchModifier::In => Some(TerminologyOp::In),
430            SearchModifier::NotIn => Some(TerminologyOp::NotIn),
431            _ => None,
432        }
433    }
434
435    /// Calculates a complexity score (1-10).
436    fn calculate_complexity(&self, analysis: &QueryAnalysis) -> u8 {
437        let mut score = 1u8;
438
439        // Add for advanced features
440        if analysis.has_chaining() {
441            score = score.saturating_add(2);
442        }
443        if analysis.has_fulltext() {
444            score = score.saturating_add(1);
445        }
446        if analysis.has_terminology() {
447            score = score.saturating_add(2);
448        }
449        if analysis.has_includes() {
450            score = score.saturating_add(1);
451        }
452
453        // Add for number of features
454        let feature_count = analysis.features.len();
455        if feature_count > 5 {
456            score = score.saturating_add(1);
457        }
458        if feature_count > 8 {
459            score = score.saturating_add(1);
460        }
461
462        // Add for reverse chaining depth
463        if analysis.features.contains(&QueryFeature::ReverseChaining) {
464            score = score.saturating_add(1);
465        }
466
467        // Cap at 10
468        score.min(10)
469    }
470
471    /// Determines if the query can be split across backends.
472    fn is_splittable(&self, _analysis: &QueryAnalysis) -> bool {
473        // Queries are splittable unless they have tight coupling
474        // between parameters that must be evaluated together
475
476        // For now, most queries are splittable
477        // This can be refined based on specific query patterns
478        true
479    }
480
481    /// Returns features for a specific parameter.
482    pub fn features_for_param(&self, param: &SearchParameter) -> HashSet<QueryFeature> {
483        let mut analysis = QueryAnalysis::empty();
484        self.analyze_parameter(param, &mut analysis);
485        analysis.features
486    }
487}
488
489/// Convert query features to backend capabilities.
490pub fn features_to_capabilities(features: &HashSet<QueryFeature>) -> HashSet<BackendCapability> {
491    features
492        .iter()
493        .filter_map(|f| f.required_capability())
494        .collect()
495}
496
497/// Detects features from a query (convenience function).
498pub fn detect_query_features(query: &SearchQuery) -> HashSet<QueryFeature> {
499    QueryAnalyzer::new().analyze(query).features
500}
501
502#[cfg(test)]
503mod tests {
504    use super::*;
505    use crate::types::{
506        ChainedParameter, IncludeDirective, SearchModifier, SearchValue, SortDirective,
507    };
508
509    #[test]
510    fn test_detect_basic_search() {
511        let query = SearchQuery::new("Patient").with_parameter(SearchParameter {
512            name: "name".to_string(),
513            param_type: SearchParamType::String,
514            modifier: None,
515            values: vec![SearchValue::string("Smith")],
516            chain: vec![],
517            components: vec![],
518        });
519
520        let features = detect_query_features(&query);
521        assert!(features.contains(&QueryFeature::BasicSearch));
522        assert!(features.contains(&QueryFeature::StringSearch));
523    }
524
525    #[test]
526    fn test_detect_chained_search() {
527        let query = SearchQuery::new("Observation").with_parameter(SearchParameter {
528            name: "name".to_string(),
529            param_type: SearchParamType::String,
530            modifier: None,
531            values: vec![SearchValue::string("Smith")],
532            chain: vec![ChainedParameter {
533                reference_param: "subject".to_string(),
534                target_type: Some("Patient".to_string()),
535                target_param: "name".to_string(),
536            }],
537            components: vec![],
538        });
539
540        let features = detect_query_features(&query);
541        assert!(features.contains(&QueryFeature::ChainedSearch));
542    }
543
544    #[test]
545    fn test_detect_fulltext_search() {
546        let query = SearchQuery::new("Patient").with_parameter(SearchParameter {
547            name: "_text".to_string(),
548            param_type: SearchParamType::String,
549            modifier: None,
550            values: vec![SearchValue::string("cardiac")],
551            chain: vec![],
552            components: vec![],
553        });
554
555        let features = detect_query_features(&query);
556        assert!(features.contains(&QueryFeature::FullTextSearch));
557    }
558
559    #[test]
560    fn test_detect_terminology_search() {
561        let query = SearchQuery::new("Observation").with_parameter(SearchParameter {
562            name: "code".to_string(),
563            param_type: SearchParamType::Token,
564            modifier: Some(SearchModifier::Below),
565            values: vec![SearchValue::token(Some("http://loinc.org"), "8867-4")],
566            chain: vec![],
567            components: vec![],
568        });
569
570        let features = detect_query_features(&query);
571        assert!(features.contains(&QueryFeature::TerminologySearch));
572    }
573
574    #[test]
575    fn test_detect_terminology_search_above() {
576        let query = SearchQuery::new("Observation").with_parameter(SearchParameter {
577            name: "code".to_string(),
578            param_type: SearchParamType::Token,
579            modifier: Some(SearchModifier::Above),
580            values: vec![SearchValue::token(Some("http://loinc.org"), "8867-4")],
581            chain: vec![],
582            components: vec![],
583        });
584
585        let features = detect_query_features(&query);
586        assert!(features.contains(&QueryFeature::TerminologySearch));
587    }
588
589    #[test]
590    fn test_detect_include() {
591        let query = SearchQuery::new("Observation").with_include(IncludeDirective {
592            include_type: IncludeType::Include,
593            source_type: "Observation".to_string(),
594            search_param: "patient".to_string(),
595            target_type: Some("Patient".to_string()),
596            iterate: false,
597        });
598
599        let features = detect_query_features(&query);
600        assert!(features.contains(&QueryFeature::Include));
601    }
602
603    #[test]
604    fn test_detect_revinclude() {
605        let query = SearchQuery::new("Patient").with_include(IncludeDirective {
606            include_type: IncludeType::Revinclude,
607            source_type: "Observation".to_string(),
608            search_param: "subject".to_string(),
609            target_type: Some("Patient".to_string()),
610            iterate: false,
611        });
612
613        let features = detect_query_features(&query);
614        assert!(features.contains(&QueryFeature::Revinclude));
615    }
616
617    #[test]
618    fn test_detect_sorting() {
619        let query = SearchQuery::new("Patient").with_sort(SortDirective::parse("-_lastUpdated"));
620
621        let features = detect_query_features(&query);
622        assert!(features.contains(&QueryFeature::Sorting));
623    }
624
625    #[test]
626    fn test_complexity_score() {
627        let analyzer = QueryAnalyzer::new();
628
629        // Simple query
630        let simple = SearchQuery::new("Patient").with_parameter(SearchParameter {
631            name: "_id".to_string(),
632            param_type: SearchParamType::Token,
633            modifier: None,
634            values: vec![SearchValue::eq("123")],
635            chain: vec![],
636            components: vec![],
637        });
638        let simple_analysis = analyzer.analyze(&simple);
639        assert!(simple_analysis.complexity_score <= 3);
640
641        // Complex query with chaining and full-text
642        let complex = SearchQuery::new("Observation")
643            .with_parameter(SearchParameter {
644                name: "name".to_string(),
645                param_type: SearchParamType::String,
646                modifier: None,
647                values: vec![SearchValue::string("Smith")],
648                chain: vec![ChainedParameter {
649                    reference_param: "subject".to_string(),
650                    target_type: Some("Patient".to_string()),
651                    target_param: "name".to_string(),
652                }],
653                components: vec![],
654            })
655            .with_parameter(SearchParameter {
656                name: "_text".to_string(),
657                param_type: SearchParamType::String,
658                modifier: None,
659                values: vec![SearchValue::string("cardiac")],
660                chain: vec![],
661                components: vec![],
662            })
663            .with_parameter(SearchParameter {
664                name: "code".to_string(),
665                param_type: SearchParamType::Token,
666                modifier: Some(SearchModifier::Below),
667                values: vec![SearchValue::token(Some("http://loinc.org"), "8867-4")],
668                chain: vec![],
669                components: vec![],
670            });
671        let complex_analysis = analyzer.analyze(&complex);
672        assert!(
673            complex_analysis.complexity_score >= 5,
674            "Expected complexity >= 5, got {}",
675            complex_analysis.complexity_score
676        );
677    }
678
679    #[test]
680    fn test_features_to_capabilities() {
681        let features = HashSet::from([
682            QueryFeature::BasicSearch,
683            QueryFeature::ChainedSearch,
684            QueryFeature::FullTextSearch,
685        ]);
686
687        let caps = features_to_capabilities(&features);
688        assert!(caps.contains(&BackendCapability::BasicSearch));
689        assert!(caps.contains(&BackendCapability::ChainedSearch));
690        assert!(caps.contains(&BackendCapability::FullTextSearch));
691    }
692}