Skip to main content

lance_graph/
config.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Graph configuration for mapping Lance datasets to property graphs
5
6use crate::error::{GraphError, Result};
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// Configuration for mapping Lance datasets to property graphs
11///
12/// # Important: Case-Insensitive Keys
13///
14/// **WARNING**: `node_mappings` and `relationship_mappings` store keys as **lowercase** for
15/// case-insensitive behavior. If you directly insert into these maps, you **must** normalize
16/// keys to lowercase to maintain invariants.
17///
18/// **Recommended**: Use `GraphConfigBuilder` instead of direct field access. The builder
19/// automatically normalizes keys.
20///
21/// # Future API Changes
22///
23/// These fields may become private in a future major version to enforce invariants.
24/// Code should migrate to using accessor methods (`get_node_mapping()`, `get_relationship_mapping()`)
25/// rather than direct field access.
26///
27/// # TODO: API Safety
28///
29/// TODO: Make `node_mappings` and `relationship_mappings` private to prevent external code
30/// from bypassing key normalization. This would require:
31/// 1. Adding iterator methods for external access (e.g., `iter_node_mappings()`)
32/// 2. Making this a breaking API change in next major version
33/// 3. Ensuring all internal code uses accessor methods
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct GraphConfig {
36    /// Mapping of node labels to their field configurations
37    ///
38    /// **Keys are stored as lowercase** for case-insensitive lookups.
39    /// Use `get_node_mapping()` for lookups instead of direct access.
40    ///
41    /// TODO: Make this private to enforce key normalization invariants
42    pub node_mappings: HashMap<String, NodeMapping>,
43
44    /// Mapping of relationship types to their field configurations
45    ///
46    /// **Keys are stored as lowercase** for case-insensitive lookups.
47    /// Use `get_relationship_mapping()` for lookups instead of direct access.
48    ///
49    /// TODO: Make this private to enforce key normalization invariants
50    pub relationship_mappings: HashMap<String, RelationshipMapping>,
51
52    /// Default node ID field if not specified in mappings
53    pub default_node_id_field: String,
54
55    /// Default relationship type field if not specified in mappings
56    pub default_relationship_type_field: String,
57}
58
59/// Configuration for mapping node labels to dataset fields
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct NodeMapping {
62    /// The node label (e.g., "Person", "Product")
63    pub label: String,
64    /// Field name that serves as the node identifier
65    pub id_field: String,
66    /// Optional fields that define node properties
67    pub property_fields: Vec<String>,
68    /// Optional filter conditions for this node type
69    pub filter_conditions: Option<String>,
70}
71
72/// Configuration for mapping relationship types to dataset fields
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct RelationshipMapping {
75    /// The relationship type (e.g., "KNOWS", "PURCHASED")
76    pub relationship_type: String,
77    /// Field containing the source node ID
78    pub source_id_field: String,
79    /// Field containing the target node ID
80    pub target_id_field: String,
81    /// Optional field containing the relationship type
82    pub type_field: Option<String>,
83    /// Optional fields that define relationship properties
84    pub property_fields: Vec<String>,
85    /// Optional filter conditions for this relationship type
86    pub filter_conditions: Option<String>,
87}
88
89impl Default for GraphConfig {
90    fn default() -> Self {
91        Self {
92            node_mappings: HashMap::new(),
93            relationship_mappings: HashMap::new(),
94            default_node_id_field: "id".to_string(),
95            default_relationship_type_field: "type".to_string(),
96        }
97    }
98}
99
100impl GraphConfig {
101    /// Create a new builder for GraphConfig
102    pub fn builder() -> GraphConfigBuilder {
103        GraphConfigBuilder::new()
104    }
105
106    /// Get node mapping for a given label (case-insensitive)
107    ///
108    /// Looks up the node mapping using case-insensitive comparison.
109    /// For example, "Person", "PERSON", and "person" all refer to the same label.
110    pub fn get_node_mapping(&self, label: &str) -> Option<&NodeMapping> {
111        self.node_mappings.get(&label.to_lowercase())
112    }
113
114    /// Get relationship mapping for a given type (case-insensitive)
115    ///
116    /// Looks up the relationship mapping using case-insensitive comparison.
117    /// For example, "FOLLOWS", "follows", and "Follows" all refer to the same type.
118    pub fn get_relationship_mapping(&self, rel_type: &str) -> Option<&RelationshipMapping> {
119        self.relationship_mappings.get(&rel_type.to_lowercase())
120    }
121
122    /// Validate the configuration
123    ///
124    /// Checks for:
125    /// - Empty ID fields
126    /// - Non-normalized keys (must be lowercase)
127    /// - Case-insensitive duplicates
128    pub fn validate(&self) -> Result<()> {
129        // Validate node mappings
130        for (label, mapping) in &self.node_mappings {
131            // Check that keys are normalized (lowercase)
132            if label != &label.to_lowercase() {
133                return Err(GraphError::ConfigError {
134                    message: format!(
135                        "Node mapping key '{}' is not normalized. \
136                         Keys must be lowercase. Use GraphConfigBuilder to ensure proper normalization.",
137                        label
138                    ),
139                    location: snafu::Location::new(file!(), line!(), column!()),
140                });
141            }
142
143            if mapping.id_field.is_empty() {
144                return Err(GraphError::ConfigError {
145                    message: format!("Node mapping for '{}' has empty id_field", label),
146                    location: snafu::Location::new(file!(), line!(), column!()),
147                });
148            }
149        }
150
151        // Validate relationship mappings
152        for (rel_type, mapping) in &self.relationship_mappings {
153            // Check that keys are normalized (lowercase)
154            if rel_type != &rel_type.to_lowercase() {
155                return Err(GraphError::ConfigError {
156                    message: format!(
157                        "Relationship mapping key '{}' is not normalized. \
158                         Keys must be lowercase. Use GraphConfigBuilder to ensure proper normalization.",
159                        rel_type
160                    ),
161                    location: snafu::Location::new(file!(), line!(), column!()),
162                });
163            }
164
165            if mapping.source_id_field.is_empty() || mapping.target_id_field.is_empty() {
166                return Err(GraphError::ConfigError {
167                    message: format!(
168                        "Relationship mapping for '{}' has empty source or target id field",
169                        rel_type
170                    ),
171                    location: snafu::Location::new(file!(), line!(), column!()),
172                });
173            }
174        }
175
176        Ok(())
177    }
178}
179
180/// Builder for GraphConfig
181#[derive(Debug, Default, Clone)]
182pub struct GraphConfigBuilder {
183    node_mappings: HashMap<String, NodeMapping>,
184    relationship_mappings: HashMap<String, RelationshipMapping>,
185    default_node_id_field: Option<String>,
186    default_relationship_type_field: Option<String>,
187}
188
189impl GraphConfigBuilder {
190    /// Create a new builder
191    pub fn new() -> Self {
192        Self::default()
193    }
194
195    /// Add a node label mapping
196    ///
197    /// Note: Labels are case-insensitive. If you add "Person" and "person", the second
198    /// will overwrite the first. Keys are stored as lowercase to prevent duplicates.
199    pub fn with_node_label<S: Into<String>>(mut self, label: S, id_field: S) -> Self {
200        let label_str = label.into();
201        let normalized_key = label_str.to_lowercase();
202        self.node_mappings.insert(
203            normalized_key,
204            NodeMapping {
205                label: label_str, // Keep original case for display
206                id_field: id_field.into(),
207                property_fields: Vec::new(),
208                filter_conditions: None,
209            },
210        );
211        self
212    }
213
214    /// Add a node mapping with additional configuration
215    pub fn with_node_mapping(mut self, mapping: NodeMapping) -> Self {
216        let normalized_key = mapping.label.to_lowercase();
217        self.node_mappings.insert(normalized_key, mapping);
218        self
219    }
220
221    /// Add a relationship type mapping
222    pub fn with_relationship<S: Into<String>>(
223        mut self,
224        rel_type: S,
225        source_field: S,
226        target_field: S,
227    ) -> Self {
228        let type_str = rel_type.into();
229        let normalized_key = type_str.to_lowercase();
230        self.relationship_mappings.insert(
231            normalized_key,
232            RelationshipMapping {
233                relationship_type: type_str, // Keep original case for display
234                source_id_field: source_field.into(),
235                target_id_field: target_field.into(),
236                type_field: None,
237                property_fields: Vec::new(),
238                filter_conditions: None,
239            },
240        );
241        self
242    }
243
244    /// Add a relationship mapping with additional configuration
245    pub fn with_relationship_mapping(mut self, mapping: RelationshipMapping) -> Self {
246        let normalized_key = mapping.relationship_type.to_lowercase();
247        self.relationship_mappings.insert(normalized_key, mapping);
248        self
249    }
250
251    /// Set the default node ID field
252    pub fn with_default_node_id_field<S: Into<String>>(mut self, field: S) -> Self {
253        self.default_node_id_field = Some(field.into());
254        self
255    }
256
257    /// Set the default relationship type field
258    pub fn with_default_relationship_type_field<S: Into<String>>(mut self, field: S) -> Self {
259        self.default_relationship_type_field = Some(field.into());
260        self
261    }
262
263    /// Build the GraphConfig
264    pub fn build(self) -> Result<GraphConfig> {
265        let config = GraphConfig {
266            node_mappings: self.node_mappings,
267            relationship_mappings: self.relationship_mappings,
268            default_node_id_field: self
269                .default_node_id_field
270                .unwrap_or_else(|| "id".to_string()),
271            default_relationship_type_field: self
272                .default_relationship_type_field
273                .unwrap_or_else(|| "type".to_string()),
274        };
275
276        config.validate()?;
277        Ok(config)
278    }
279}
280
281impl NodeMapping {
282    /// Create a new node mapping
283    pub fn new<S: Into<String>>(label: S, id_field: S) -> Self {
284        Self {
285            label: label.into(),
286            id_field: id_field.into(),
287            property_fields: Vec::new(),
288            filter_conditions: None,
289        }
290    }
291
292    /// Add property fields to the mapping
293    pub fn with_properties(mut self, fields: Vec<String>) -> Self {
294        self.property_fields = fields;
295        self
296    }
297
298    /// Add filter conditions for this node type
299    pub fn with_filter<S: Into<String>>(mut self, filter: S) -> Self {
300        self.filter_conditions = Some(filter.into());
301        self
302    }
303}
304
305impl RelationshipMapping {
306    /// Create a new relationship mapping
307    pub fn new<S: Into<String>>(rel_type: S, source_field: S, target_field: S) -> Self {
308        Self {
309            relationship_type: rel_type.into(),
310            source_id_field: source_field.into(),
311            target_id_field: target_field.into(),
312            type_field: None,
313            property_fields: Vec::new(),
314            filter_conditions: None,
315        }
316    }
317
318    /// Set the type field for this relationship
319    pub fn with_type_field<S: Into<String>>(mut self, type_field: S) -> Self {
320        self.type_field = Some(type_field.into());
321        self
322    }
323
324    /// Add property fields to the mapping
325    pub fn with_properties(mut self, fields: Vec<String>) -> Self {
326        self.property_fields = fields;
327        self
328    }
329
330    /// Add filter conditions for this relationship type
331    pub fn with_filter<S: Into<String>>(mut self, filter: S) -> Self {
332        self.filter_conditions = Some(filter.into());
333        self
334    }
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340
341    #[test]
342    fn test_graph_config_builder() {
343        let config = GraphConfig::builder()
344            .with_node_label("Person", "person_id")
345            .with_node_label("Company", "company_id")
346            .with_relationship("WORKS_FOR", "person_id", "company_id")
347            .build()
348            .unwrap();
349
350        assert_eq!(config.node_mappings.len(), 2);
351        assert_eq!(config.relationship_mappings.len(), 1);
352
353        let person_mapping = config.get_node_mapping("Person").unwrap();
354        assert_eq!(person_mapping.id_field, "person_id");
355
356        let works_for_mapping = config.get_relationship_mapping("WORKS_FOR").unwrap();
357        assert_eq!(works_for_mapping.source_id_field, "person_id");
358        assert_eq!(works_for_mapping.target_id_field, "company_id");
359    }
360
361    #[test]
362    fn test_validation_empty_id_field() {
363        let mut config = GraphConfig::default();
364        config.node_mappings.insert(
365            "Person".to_string(),
366            NodeMapping {
367                label: "Person".to_string(),
368                id_field: "".to_string(),
369                property_fields: Vec::new(),
370                filter_conditions: None,
371            },
372        );
373
374        assert!(config.validate().is_err());
375    }
376
377    #[test]
378    fn test_node_mapping_with_properties() {
379        let mapping = NodeMapping::new("Person", "id")
380            .with_properties(vec!["name".to_string(), "age".to_string()])
381            .with_filter("age > 18".to_string());
382
383        assert_eq!(mapping.property_fields.len(), 2);
384        assert!(mapping.filter_conditions.is_some());
385    }
386
387    #[test]
388    fn test_case_insensitive_node_label_lookup() {
389        // Test that node label lookups are case-insensitive
390        let config = GraphConfig::builder()
391            .with_node_label("Person", "person_id")
392            .with_node_label("Company", "company_id")
393            .build()
394            .unwrap();
395
396        // All case variations should work
397        assert!(config.get_node_mapping("Person").is_some());
398        assert!(config.get_node_mapping("person").is_some());
399        assert!(config.get_node_mapping("PERSON").is_some());
400        assert!(config.get_node_mapping("PeRsOn").is_some());
401
402        assert!(config.get_node_mapping("Company").is_some());
403        assert!(config.get_node_mapping("company").is_some());
404        assert!(config.get_node_mapping("COMPANY").is_some());
405
406        // Non-existent labels should return None
407        assert!(config.get_node_mapping("Unknown").is_none());
408        assert!(config.get_node_mapping("unknown").is_none());
409
410        // Verify we get the same mapping regardless of case
411        let mapping1 = config.get_node_mapping("Person").unwrap();
412        let mapping2 = config.get_node_mapping("person").unwrap();
413        let mapping3 = config.get_node_mapping("PERSON").unwrap();
414
415        assert_eq!(mapping1.id_field, mapping2.id_field);
416        assert_eq!(mapping2.id_field, mapping3.id_field);
417        assert_eq!(mapping1.id_field, "person_id");
418    }
419
420    #[test]
421    fn test_case_insensitive_relationship_type_lookup() {
422        // Test that relationship type lookups are case-insensitive
423        let config = GraphConfig::builder()
424            .with_relationship("FOLLOWS", "src_id", "dst_id")
425            .with_relationship("WORKS_FOR", "person_id", "company_id")
426            .build()
427            .unwrap();
428
429        // All case variations should work
430        assert!(config.get_relationship_mapping("FOLLOWS").is_some());
431        assert!(config.get_relationship_mapping("follows").is_some());
432        assert!(config.get_relationship_mapping("Follows").is_some());
433
434        assert!(config.get_relationship_mapping("WORKS_FOR").is_some());
435        assert!(config.get_relationship_mapping("works_for").is_some());
436        assert!(config.get_relationship_mapping("Works_For").is_some());
437
438        // Non-existent types should return None
439        assert!(config.get_relationship_mapping("UNKNOWN").is_none());
440        assert!(config.get_relationship_mapping("unknown").is_none());
441
442        // Verify we get the same mapping regardless of case
443        let mapping1 = config.get_relationship_mapping("FOLLOWS").unwrap();
444        let mapping2 = config.get_relationship_mapping("follows").unwrap();
445        let mapping3 = config.get_relationship_mapping("Follows").unwrap();
446
447        assert_eq!(mapping1.source_id_field, mapping2.source_id_field);
448        assert_eq!(mapping2.source_id_field, mapping3.source_id_field);
449        assert_eq!(mapping1.source_id_field, "src_id");
450    }
451
452    #[test]
453    fn test_duplicate_label_different_case_should_overwrite() {
454        let builder = GraphConfig::builder()
455            .with_node_label("Person", "id")
456            .with_node_label("person", "id2"); // Should overwrite first entry
457
458        // Should have only 1 entry (second overwrites first due to lowercase key normalization)
459        assert_eq!(builder.node_mappings.len(), 1);
460
461        // The second one should have won (id2)
462        let mapping = builder.node_mappings.get("person").unwrap();
463        assert_eq!(mapping.id_field, "id2");
464    }
465}