Skip to main content

helios_persistence/search/
extractor.rs

1//! SearchParameter Value Extractor.
2//!
3//! Uses FHIRPath expressions to extract searchable values from FHIR resources.
4
5use std::collections::HashMap;
6use std::sync::Arc;
7
8use helios_fhirpath::EvaluationContext;
9use helios_fhirpath_support::EvaluationResult;
10use parking_lot::RwLock;
11use rust_decimal::Decimal;
12use serde::{Deserialize, Serialize};
13use serde_json::Value;
14
15use crate::types::SearchParamType;
16
17use super::converters::{IndexValue, ValueConverter};
18use super::errors::ExtractionError;
19use super::registry::{SearchParameterDefinition, SearchParameterRegistry};
20
21/// A value extracted from a resource for indexing.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct ExtractedValue {
24    /// The parameter name (e.g., "name", "identifier").
25    pub param_name: String,
26
27    /// The parameter URL.
28    pub param_url: String,
29
30    /// The parameter type.
31    pub param_type: SearchParamType,
32
33    /// The extracted and converted value.
34    pub value: IndexValue,
35
36    /// Composite group ID (for composite parameters).
37    /// Values with the same group ID are part of the same composite match.
38    pub composite_group: Option<u32>,
39}
40
41impl ExtractedValue {
42    /// Creates a new extracted value.
43    pub fn new(
44        param_name: impl Into<String>,
45        param_url: impl Into<String>,
46        param_type: SearchParamType,
47        value: IndexValue,
48    ) -> Self {
49        Self {
50            param_name: param_name.into(),
51            param_url: param_url.into(),
52            param_type,
53            value,
54            composite_group: None,
55        }
56    }
57
58    /// Sets the composite group ID.
59    pub fn with_composite_group(mut self, group: u32) -> Self {
60        self.composite_group = Some(group);
61        self
62    }
63}
64
65/// Extracts searchable values from FHIR resources using FHIRPath.
66pub struct SearchParameterExtractor {
67    registry: Arc<RwLock<SearchParameterRegistry>>,
68}
69
70impl SearchParameterExtractor {
71    /// Creates a new extractor with the given registry.
72    pub fn new(registry: Arc<RwLock<SearchParameterRegistry>>) -> Self {
73        Self { registry }
74    }
75
76    /// Extracts all searchable values from a resource.
77    ///
78    /// Returns values for all active search parameters that apply to this resource type.
79    pub fn extract(
80        &self,
81        resource: &Value,
82        resource_type: &str,
83    ) -> Result<Vec<ExtractedValue>, ExtractionError> {
84        // Validate resource
85        let obj = resource
86            .as_object()
87            .ok_or_else(|| ExtractionError::InvalidResource {
88                message: "Resource must be a JSON object".to_string(),
89            })?;
90
91        // Verify resource type
92        if let Some(rt) = obj.get("resourceType").and_then(|v| v.as_str()) {
93            if rt != resource_type {
94                return Err(ExtractionError::InvalidResource {
95                    message: format!(
96                        "Resource type mismatch: expected {}, got {}",
97                        resource_type, rt
98                    ),
99                });
100            }
101        }
102
103        let mut results = Vec::new();
104
105        // Get active parameters for this resource type
106        let params = {
107            let registry = self.registry.read();
108            registry.get_active_params(resource_type)
109        };
110
111        for param in &params {
112            match self.extract_for_param(resource, param) {
113                Ok(values) => results.extend(values),
114                Err(e) => {
115                    // Log the error but continue with other parameters
116                    tracing::warn!(
117                        "Failed to extract values for parameter '{}': {}",
118                        param.code,
119                        e
120                    );
121                }
122            }
123        }
124
125        // Also extract common Resource-level parameters
126        let common_params = {
127            let registry = self.registry.read();
128            registry.get_active_params("Resource")
129        };
130
131        for param in &common_params {
132            if !params.iter().any(|p| p.code == param.code) {
133                match self.extract_for_param(resource, param) {
134                    Ok(values) => results.extend(values),
135                    Err(e) => {
136                        tracing::warn!(
137                            "Failed to extract values for common parameter '{}': {}",
138                            param.code,
139                            e
140                        );
141                    }
142                }
143            }
144        }
145
146        Ok(results)
147    }
148
149    /// Extracts values for a specific parameter from a resource.
150    pub fn extract_for_param(
151        &self,
152        resource: &Value,
153        param: &SearchParameterDefinition,
154    ) -> Result<Vec<ExtractedValue>, ExtractionError> {
155        if param.expression.is_empty() {
156            return Ok(Vec::new());
157        }
158
159        // Get the resource type from the resource
160        let resource_type = resource
161            .get("resourceType")
162            .and_then(|v| v.as_str())
163            .unwrap_or("");
164
165        // Filter the expression to only include parts relevant to this resource type
166        let filtered_expr = self.filter_expression_for_resource(&param.expression, resource_type);
167
168        if filtered_expr.is_empty() {
169            return Ok(Vec::new());
170        }
171
172        // Evaluate the filtered FHIRPath expression using the actual evaluator
173        let values = self.evaluate_fhirpath(resource, &filtered_expr)?;
174
175        let mut results = Vec::new();
176        for value in values {
177            let converted = ValueConverter::convert(&value, param.param_type, &param.code)?;
178            for idx_value in converted {
179                results.push(ExtractedValue::new(
180                    &param.code,
181                    &param.url,
182                    param.param_type,
183                    idx_value,
184                ));
185            }
186        }
187
188        Ok(results)
189    }
190
191    /// Filters a FHIRPath expression to only include parts relevant to a specific resource type.
192    ///
193    /// Many FHIR SearchParameters have expressions that span multiple resource types, joined
194    /// with `|` (union). For example, the `patient` parameter has:
195    /// `AllergyIntolerance.patient | CarePlan.subject.where(resolve() is Patient) | ...`
196    ///
197    /// This method extracts only the parts that start with the given resource type and
198    /// simplifies common patterns that use `resolve()`.
199    fn filter_expression_for_resource(&self, expression: &str, resource_type: &str) -> String {
200        // Split by | and filter to parts starting with our resource type
201        let parts: Vec<String> = expression
202            .split('|')
203            .map(|p| p.trim())
204            .filter(|p| {
205                // Check if this part starts with our resource type
206                p.starts_with(resource_type)
207                    && (p.len() == resource_type.len()
208                        || p.chars().nth(resource_type.len()) == Some('.'))
209            })
210            .map(|p| self.simplify_resolve_pattern(p))
211            .collect();
212
213        if parts.is_empty() {
214            // If no parts match, return the original expression
215            // This handles expressions that don't use ResourceType prefix
216            expression.to_string()
217        } else {
218            // Join the filtered parts back with |
219            parts.join(" | ")
220        }
221    }
222
223    /// Simplifies common `.where(resolve() is ResourceType)` patterns.
224    ///
225    /// In FHIR SearchParameters, patterns like `subject.where(resolve() is Patient)`
226    /// are used to filter references by target type. Since we're extracting references
227    /// for indexing (not actually resolving them), we can safely strip this pattern
228    /// and just extract the reference value.
229    fn simplify_resolve_pattern(&self, expr: &str) -> String {
230        // Pattern: .where(resolve() is SomeType)
231        // We want to remove this suffix since we just need the reference value
232        if let Some(where_pos) = expr.find(".where(resolve()") {
233            // Find the matching closing paren
234            let after_where = &expr[where_pos..];
235            if after_where.rfind(')').is_some() {
236                // Return everything before .where(...)
237                return expr[..where_pos].to_string();
238            }
239        }
240        expr.to_string()
241    }
242
243    /// Evaluates a FHIRPath expression against a resource using the helios-fhirpath evaluator.
244    fn evaluate_fhirpath(
245        &self,
246        resource: &Value,
247        expression: &str,
248    ) -> Result<Vec<Value>, ExtractionError> {
249        // Convert JSON to EvaluationResult and set up context
250        let eval_result = json_to_evaluation_result(resource)?;
251
252        // Create evaluation context with the resource as 'this'
253        let mut context = EvaluationContext::new_empty_with_default_version();
254        context.set_this(eval_result);
255
256        // Evaluate the FHIRPath expression
257        let result = helios_fhirpath::evaluate_expression(expression, &context).map_err(|e| {
258            ExtractionError::FhirPathError {
259                expression: expression.to_string(),
260                message: e,
261            }
262        })?;
263
264        // Convert EvaluationResult back to JSON values
265        evaluation_result_to_json_values(&result)
266    }
267}
268
269/// Converts a serde_json::Value to an EvaluationResult.
270fn json_to_evaluation_result(value: &Value) -> Result<EvaluationResult, ExtractionError> {
271    match value {
272        Value::Null => Ok(EvaluationResult::Empty),
273        Value::Bool(b) => Ok(EvaluationResult::boolean(*b)),
274        Value::Number(n) => {
275            if let Some(i) = n.as_i64() {
276                Ok(EvaluationResult::integer(i))
277            } else if let Some(f) = n.as_f64() {
278                Ok(EvaluationResult::decimal(Decimal::try_from(f).map_err(
279                    |e| ExtractionError::ConversionError {
280                        message: format!("Invalid decimal: {}", e),
281                    },
282                )?))
283            } else {
284                Err(ExtractionError::ConversionError {
285                    message: "Invalid number".to_string(),
286                })
287            }
288        }
289        Value::String(s) => Ok(EvaluationResult::string(s.clone())),
290        Value::Array(arr) => {
291            let results: Result<Vec<_>, _> = arr.iter().map(json_to_evaluation_result).collect();
292            Ok(EvaluationResult::collection(results?))
293        }
294        Value::Object(obj) => {
295            let mut map = HashMap::new();
296            for (key, val) in obj {
297                let eval_val = json_to_evaluation_result(val)?;
298                map.insert(key.clone(), eval_val);
299            }
300            Ok(EvaluationResult::Object {
301                map,
302                type_info: None,
303            })
304        }
305    }
306}
307
308/// Converts an EvaluationResult back to JSON values for the converter.
309fn evaluation_result_to_json_values(
310    result: &EvaluationResult,
311) -> Result<Vec<Value>, ExtractionError> {
312    match result {
313        EvaluationResult::Empty => Ok(Vec::new()),
314        EvaluationResult::Boolean(b, _) => Ok(vec![Value::Bool(*b)]),
315        EvaluationResult::String(s, _) => Ok(vec![Value::String(s.clone())]),
316        EvaluationResult::Integer(i, _) => Ok(vec![Value::Number((*i).into())]),
317        EvaluationResult::Integer64(i, _) => Ok(vec![Value::Number((*i).into())]),
318        EvaluationResult::Decimal(d, _) => {
319            // Convert decimal to JSON number
320            let f: f64 = (*d).try_into().unwrap_or(0.0);
321            Ok(vec![Value::Number(
322                serde_json::Number::from_f64(f).unwrap_or_else(|| serde_json::Number::from(0)),
323            )])
324        }
325        EvaluationResult::Date(s, _) => Ok(vec![Value::String(s.clone())]),
326        EvaluationResult::DateTime(s, _) => Ok(vec![Value::String(s.clone())]),
327        EvaluationResult::Time(s, _) => Ok(vec![Value::String(s.clone())]),
328        EvaluationResult::Quantity(value, unit, _) => {
329            // Convert Quantity to JSON object
330            let f: f64 = (*value).try_into().unwrap_or(0.0);
331            Ok(vec![serde_json::json!({
332                "value": f,
333                "unit": unit
334            })])
335        }
336        EvaluationResult::Collection { items, .. } => {
337            let mut values = Vec::new();
338            for item in items {
339                values.extend(evaluation_result_to_json_values(item)?);
340            }
341            Ok(values)
342        }
343        EvaluationResult::Object { map, .. } => {
344            // Convert object back to JSON
345            let mut obj = serde_json::Map::new();
346            for (key, val) in map {
347                let json_vals = evaluation_result_to_json_values(val)?;
348                // Check if the original value was a Collection - if so, preserve it as an array
349                // even if it has only one element, since FHIR arrays should stay as arrays
350                let is_collection = matches!(val, EvaluationResult::Collection { .. });
351                if is_collection {
352                    // Always preserve arrays as arrays
353                    obj.insert(key.clone(), Value::Array(json_vals));
354                } else if json_vals.len() == 1 {
355                    obj.insert(key.clone(), json_vals.into_iter().next().unwrap());
356                } else if !json_vals.is_empty() {
357                    obj.insert(key.clone(), Value::Array(json_vals));
358                }
359            }
360            Ok(vec![Value::Object(obj)])
361        }
362    }
363}
364
365impl std::fmt::Debug for SearchParameterExtractor {
366    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
367        f.debug_struct("SearchParameterExtractor").finish()
368    }
369}
370
371#[cfg(test)]
372mod tests {
373    use super::*;
374    use crate::search::loader::SearchParameterLoader;
375    use helios_fhir::FhirVersion;
376    use serde_json::json;
377    use std::path::PathBuf;
378
379    fn create_test_extractor() -> SearchParameterExtractor {
380        let loader = SearchParameterLoader::new(FhirVersion::R4);
381        let mut registry = SearchParameterRegistry::new();
382
383        // Load minimal fallback
384        if let Ok(params) = loader.load_embedded() {
385            for param in params {
386                let _ = registry.register(param);
387            }
388        }
389
390        // Load spec file for full parameter support
391        // CARGO_MANIFEST_DIR for this crate is crates/persistence
392        // We need to go up two levels to reach the workspace root
393        let data_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
394            .parent()
395            .and_then(|p| p.parent())
396            .map(|p| p.join("data"))
397            .unwrap_or_else(|| PathBuf::from("data"));
398
399        if let Ok(params) = loader.load_from_spec_file(&data_dir) {
400            for param in params {
401                let _ = registry.register(param);
402            }
403        }
404
405        SearchParameterExtractor::new(Arc::new(RwLock::new(registry)))
406    }
407
408    #[test]
409    fn test_extract_patient_name() {
410        let extractor = create_test_extractor();
411
412        let patient = json!({
413            "resourceType": "Patient",
414            "id": "123",
415            "name": [
416                {
417                    "family": "Smith",
418                    "given": ["John", "James"]
419                }
420            ]
421        });
422
423        let values = extractor.extract(&patient, "Patient").unwrap();
424
425        // Should have extracted name values
426        let name_values: Vec<_> = values.iter().filter(|v| v.param_name == "name").collect();
427        assert!(!name_values.is_empty(), "Should extract 'name' values");
428
429        // Should have extracted family
430        let family_values: Vec<_> = values.iter().filter(|v| v.param_name == "family").collect();
431        assert!(!family_values.is_empty(), "Should extract 'family' values");
432    }
433
434    #[test]
435    fn test_extract_patient_identifier() {
436        let extractor = create_test_extractor();
437
438        let patient = json!({
439            "resourceType": "Patient",
440            "id": "123",
441            "identifier": [
442                {
443                    "system": "http://hospital.org/mrn",
444                    "value": "12345"
445                }
446            ]
447        });
448
449        let values = extractor.extract(&patient, "Patient").unwrap();
450
451        let id_values: Vec<_> = values
452            .iter()
453            .filter(|v| v.param_name == "identifier")
454            .collect();
455        assert!(!id_values.is_empty(), "Should extract 'identifier' values");
456
457        if let IndexValue::Token { system, code, .. } = &id_values[0].value {
458            assert_eq!(system.as_ref().unwrap(), "http://hospital.org/mrn");
459            assert_eq!(code, "12345");
460        }
461    }
462
463    #[test]
464    fn test_extract_observation_values() {
465        let extractor = create_test_extractor();
466
467        let observation = json!({
468            "resourceType": "Observation",
469            "id": "obs1",
470            "code": {
471                "coding": [
472                    {
473                        "system": "http://loinc.org",
474                        "code": "8867-4"
475                    }
476                ]
477            },
478            "subject": {
479                "reference": "Patient/123"
480            },
481            "valueQuantity": {
482                "value": 120.5,
483                "unit": "mmHg"
484            }
485        });
486
487        let values = extractor.extract(&observation, "Observation").unwrap();
488
489        // Should have code
490        let code_values: Vec<_> = values.iter().filter(|v| v.param_name == "code").collect();
491        assert!(!code_values.is_empty(), "Should extract 'code' values");
492
493        // Should have subject
494        let subject_values: Vec<_> = values
495            .iter()
496            .filter(|v| v.param_name == "subject")
497            .collect();
498        assert!(
499            !subject_values.is_empty(),
500            "Should extract 'subject' values"
501        );
502    }
503
504    #[test]
505    fn test_invalid_resource() {
506        let extractor = create_test_extractor();
507
508        let not_object = json!("string");
509        let result = extractor.extract(&not_object, "Patient");
510        assert!(result.is_err());
511    }
512
513    #[test]
514    fn test_resource_type_mismatch() {
515        let extractor = create_test_extractor();
516
517        let patient = json!({
518            "resourceType": "Patient",
519            "id": "123"
520        });
521
522        let result = extractor.extract(&patient, "Observation");
523        assert!(result.is_err());
524    }
525
526    #[test]
527    fn test_fhirpath_with_where_clause() {
528        let extractor = create_test_extractor();
529
530        // Test a patient with multiple names - FHIRPath should be able to filter
531        let patient = json!({
532            "resourceType": "Patient",
533            "id": "123",
534            "name": [
535                {
536                    "use": "official",
537                    "family": "Smith",
538                    "given": ["John"]
539                },
540                {
541                    "use": "nickname",
542                    "given": ["Johnny"]
543                }
544            ]
545        });
546
547        let values = extractor.extract(&patient, "Patient").unwrap();
548
549        // Should extract all names (both official and nickname)
550        let name_values: Vec<_> = values.iter().filter(|v| v.param_name == "name").collect();
551        assert!(
552            name_values.len() >= 2,
553            "Should extract multiple name values"
554        );
555    }
556
557    #[test]
558    fn test_extract_observation_code_with_display() {
559        let extractor = create_test_extractor();
560
561        let observation = json!({
562            "resourceType": "Observation",
563            "id": "obs1",
564            "status": "final",
565            "code": {
566                "coding": [
567                    {
568                        "system": "http://loinc.org",
569                        "code": "8867-4",
570                        "display": "Heart rate"
571                    }
572                ]
573            }
574        });
575
576        // Extract values
577        let values = extractor.extract(&observation, "Observation").unwrap();
578
579        // Should have extracted code values
580        let code_values: Vec<_> = values.iter().filter(|v| v.param_name == "code").collect();
581        assert!(!code_values.is_empty(), "Should extract 'code' values");
582
583        // Check that display is populated
584        if let Some(first_code) = code_values.first() {
585            if let IndexValue::Token { display, .. } = &first_code.value {
586                assert_eq!(
587                    display.as_deref(),
588                    Some("Heart rate"),
589                    "Display should be populated"
590                );
591            }
592        }
593    }
594
595    #[test]
596    fn test_extract_resource_id() {
597        let extractor = create_test_extractor();
598
599        let patient = json!({
600            "resourceType": "Patient",
601            "id": "p1"
602        });
603
604        let values = extractor.extract(&patient, "Patient").unwrap();
605
606        // Should have extracted _id
607        let id_values: Vec<_> = values.iter().filter(|v| v.param_name == "_id").collect();
608        assert!(!id_values.is_empty(), "Should extract '_id' parameter");
609
610        // Check the value
611        if let Some(first_id) = id_values.first() {
612            if let IndexValue::Token { code, .. } = &first_id.value {
613                assert_eq!(code, "p1", "_id should be 'p1'");
614            }
615        }
616    }
617
618    #[test]
619    fn test_json_to_evaluation_result() {
620        // Test basic types
621        assert!(matches!(
622            json_to_evaluation_result(&json!(null)).unwrap(),
623            EvaluationResult::Empty
624        ));
625
626        assert!(matches!(
627            json_to_evaluation_result(&json!(true)).unwrap(),
628            EvaluationResult::Boolean(true, _)
629        ));
630
631        assert!(matches!(
632            json_to_evaluation_result(&json!("test")).unwrap(),
633            EvaluationResult::String(s, _) if s == "test"
634        ));
635
636        assert!(matches!(
637            json_to_evaluation_result(&json!(42)).unwrap(),
638            EvaluationResult::Integer(42, _)
639        ));
640
641        // Test array
642        if let EvaluationResult::Collection { items, .. } =
643            json_to_evaluation_result(&json!([1, 2, 3])).unwrap()
644        {
645            assert_eq!(items.len(), 3);
646        } else {
647            panic!("Expected collection");
648        }
649
650        // Test object
651        if let EvaluationResult::Object { map, .. } =
652            json_to_evaluation_result(&json!({"key": "value"})).unwrap()
653        {
654            assert!(map.contains_key("key"));
655        } else {
656            panic!("Expected object");
657        }
658    }
659
660    #[test]
661    fn test_filter_expression_for_resource() {
662        let extractor = create_test_extractor();
663
664        // Test multi-resource expression (like patient search param)
665        let complex_expr =
666            "AllergyIntolerance.patient | Immunization.patient | Observation.subject";
667        let filtered = extractor.filter_expression_for_resource(complex_expr, "Immunization");
668        assert_eq!(filtered, "Immunization.patient");
669
670        // Test with no matching parts - should return original
671        let no_match = extractor.filter_expression_for_resource(complex_expr, "Patient");
672        assert_eq!(no_match, complex_expr);
673
674        // Test simple expression (single resource type)
675        let simple_expr = "Patient.name";
676        let simple_filtered = extractor.filter_expression_for_resource(simple_expr, "Patient");
677        assert_eq!(simple_filtered, "Patient.name");
678
679        // Test that partial matches don't count (Observation shouldn't match Obs)
680        let partial = extractor.filter_expression_for_resource("Observation.code", "Obs");
681        assert_eq!(partial, "Observation.code");
682
683        // Test stripping .where(resolve() is X) pattern
684        let with_resolve = "Observation.subject.where(resolve() is Patient) | Patient.link.other";
685        let stripped = extractor.filter_expression_for_resource(with_resolve, "Observation");
686        assert_eq!(stripped, "Observation.subject");
687
688        // Test real-world patient search param pattern
689        let patient_expr = "CarePlan.subject.where(resolve() is Patient) | Observation.subject.where(resolve() is Patient)";
690        let careplan_filtered = extractor.filter_expression_for_resource(patient_expr, "CarePlan");
691        assert_eq!(careplan_filtered, "CarePlan.subject");
692        let obs_filtered = extractor.filter_expression_for_resource(patient_expr, "Observation");
693        assert_eq!(obs_filtered, "Observation.subject");
694    }
695
696    #[test]
697    fn test_extract_immunization_patient() {
698        let extractor = create_test_extractor();
699
700        let immunization = json!({
701            "resourceType": "Immunization",
702            "id": "test-imm",
703            "status": "completed",
704            "vaccineCode": {
705                "coding": [{
706                    "system": "http://hl7.org/fhir/sid/cvx",
707                    "code": "140"
708                }]
709            },
710            "patient": {
711                "reference": "Patient/test-patient"
712            },
713            "occurrenceDateTime": "2021-01-01"
714        });
715
716        let values = extractor.extract(&immunization, "Immunization").unwrap();
717
718        // Should have extracted patient reference
719        let patient_values: Vec<_> = values
720            .iter()
721            .filter(|v| v.param_name == "patient")
722            .collect();
723        assert!(
724            !patient_values.is_empty(),
725            "Should extract 'patient' values from Immunization"
726        );
727
728        // Check the reference value
729        if let IndexValue::Reference { reference, .. } = &patient_values[0].value {
730            assert!(
731                reference.contains("Patient/test-patient") || reference.contains("test-patient"),
732                "Should contain patient reference, got: {}",
733                reference
734            );
735        }
736    }
737}