Skip to main content

datasynth_generators/relationships/
generator.rs

1//! Relationship generator implementation.
2//!
3//! Provides generation of relationships between entities based on
4//! cardinality rules and property generation configurations.
5
6use std::collections::{HashMap, HashSet};
7
8use chrono::{DateTime, Utc};
9use rand::prelude::*;
10use rand_chacha::ChaCha8Rng;
11use serde::{Deserialize, Serialize};
12use serde_json::Value;
13use uuid::Uuid;
14
15use super::rules::{
16    CardinalityRule, PropertyGenerator, PropertyValueType, RelationshipConfig,
17    RelationshipTypeConfig, RelationshipValidation,
18};
19
20/// Generated relationship output.
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct GeneratedRelationship {
23    /// Relationship type name.
24    pub relationship_type: String,
25    /// Unique relationship ID.
26    pub id: String,
27    /// Source entity ID.
28    pub source_id: String,
29    /// Target entity ID.
30    pub target_id: String,
31    /// Relationship properties.
32    pub properties: HashMap<String, Value>,
33    /// Relationship metadata.
34    pub metadata: RelationshipMetadata,
35}
36
37/// Metadata for a generated relationship.
38#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct RelationshipMetadata {
40    /// Data source.
41    pub source: String,
42    /// Generation timestamp.
43    pub generated_at: DateTime<Utc>,
44    /// Relationship weight.
45    pub weight: Option<f64>,
46    /// Valid from timestamp.
47    pub valid_from: Option<DateTime<Utc>>,
48    /// Valid to timestamp.
49    pub valid_to: Option<DateTime<Utc>>,
50    /// Custom labels.
51    pub labels: HashMap<String, String>,
52    /// Feature vector for ML.
53    pub features: Option<Vec<f64>>,
54    /// Whether the relationship is directed.
55    pub is_directed: bool,
56}
57
58impl Default for RelationshipMetadata {
59    fn default() -> Self {
60        Self {
61            source: "datasynth".to_string(),
62            generated_at: Utc::now(),
63            weight: None,
64            valid_from: None,
65            valid_to: None,
66            labels: HashMap::new(),
67            features: None,
68            is_directed: true,
69        }
70    }
71}
72
73/// Simple node representation for relationship generation.
74#[derive(Debug, Clone)]
75pub struct NodeRef {
76    /// Node ID.
77    pub id: String,
78    /// Node type.
79    pub node_type: String,
80    /// Node properties.
81    pub properties: HashMap<String, Value>,
82}
83
84impl NodeRef {
85    /// Creates a new node reference.
86    pub fn new(id: impl Into<String>, node_type: impl Into<String>) -> Self {
87        Self {
88            id: id.into(),
89            node_type: node_type.into(),
90            properties: HashMap::new(),
91        }
92    }
93
94    /// Adds a property.
95    pub fn with_property(mut self, key: impl Into<String>, value: Value) -> Self {
96        self.properties.insert(key.into(), value);
97        self
98    }
99}
100
101/// Generator for relationships between entities.
102pub struct RelationshipGenerator {
103    /// Configuration.
104    config: RelationshipConfig,
105    /// Random number generator.
106    rng: ChaCha8Rng,
107    /// Generation count.
108    count: u64,
109    /// Track relationships by source ID for cardinality validation.
110    relationships_by_source: HashMap<String, HashMap<String, Vec<String>>>,
111    /// Track relationships by target ID for cardinality validation.
112    relationships_by_target: HashMap<String, HashMap<String, Vec<String>>>,
113    /// Visited nodes for circular detection.
114    visited: HashSet<String>,
115}
116
117impl RelationshipGenerator {
118    /// Creates a new relationship generator.
119    pub fn new(config: RelationshipConfig, seed: u64) -> Self {
120        Self {
121            config,
122            rng: ChaCha8Rng::seed_from_u64(seed),
123            count: 0,
124            relationships_by_source: HashMap::new(),
125            relationships_by_target: HashMap::new(),
126            visited: HashSet::new(),
127        }
128    }
129
130    /// Creates a generator with default configuration.
131    pub fn with_defaults(seed: u64) -> Self {
132        Self::new(RelationshipConfig::default(), seed)
133    }
134
135    /// Generates relationships for a set of nodes.
136    pub fn generate_relationships(&mut self, nodes: &[NodeRef]) -> Vec<GeneratedRelationship> {
137        let mut relationships = Vec::new();
138
139        // Group nodes by type
140        let nodes_by_type = self.group_nodes_by_type(nodes);
141
142        // Clone relationship types to avoid borrow issues
143        let relationship_types = self.config.relationship_types.clone();
144
145        // For each relationship type, generate relationships
146        for rel_type in &relationship_types {
147            let rels = self.generate_for_type(rel_type, &nodes_by_type);
148            relationships.extend(rels);
149        }
150
151        relationships
152    }
153
154    /// Generates relationships for a single node.
155    pub fn generate_for_node(
156        &mut self,
157        node: &NodeRef,
158        available_targets: &HashMap<String, Vec<NodeRef>>,
159    ) -> Vec<GeneratedRelationship> {
160        // Check for orphan generation
161        if self.config.allow_orphans && self.rng.gen_bool(self.config.orphan_probability) {
162            return Vec::new();
163        }
164
165        let mut relationships = Vec::new();
166
167        // Clone applicable relationship types to avoid borrow issues
168        let applicable_types: Vec<_> = self
169            .config
170            .relationship_types
171            .iter()
172            .filter(|rt| rt.source_type == node.node_type)
173            .cloned()
174            .collect();
175
176        for rel_type in &applicable_types {
177            if let Some(targets) = available_targets.get(&rel_type.target_type) {
178                let rels = self.generate_edges_for_node(node, targets, rel_type);
179                relationships.extend(rels);
180            }
181        }
182
183        relationships
184    }
185
186    /// Checks if a relationship would create a valid cardinality.
187    pub fn check_cardinality(
188        &self,
189        source_id: &str,
190        target_id: &str,
191        rel_type: &str,
192    ) -> RelationshipValidation {
193        // Find the relationship type config
194        let type_config = self
195            .config
196            .relationship_types
197            .iter()
198            .find(|rt| rt.name == rel_type);
199
200        let Some(type_config) = type_config else {
201            return RelationshipValidation::invalid(format!(
202                "Unknown relationship type: {}",
203                rel_type
204            ));
205        };
206
207        let (_min, max) = type_config.cardinality.bounds();
208
209        // Check source-side cardinality
210        let current_count = self
211            .relationships_by_source
212            .get(source_id)
213            .and_then(|m| m.get(rel_type))
214            .map(|v| v.len())
215            .unwrap_or(0);
216
217        if current_count >= max as usize {
218            return RelationshipValidation::invalid(format!(
219                "Source {} already has maximum {} {} relationships",
220                source_id, max, rel_type
221            ));
222        }
223
224        // For OneToOne and ManyToOne, check if target already has a relationship
225        if matches!(
226            type_config.cardinality,
227            CardinalityRule::OneToOne | CardinalityRule::ManyToOne { .. }
228        ) {
229            let target_count = self
230                .relationships_by_target
231                .get(target_id)
232                .and_then(|m| m.get(rel_type))
233                .map(|v| v.len())
234                .unwrap_or(0);
235
236            if target_count > 0 {
237                return RelationshipValidation::invalid(format!(
238                    "Target {} already has a {} relationship",
239                    target_id, rel_type
240                ));
241            }
242        }
243
244        RelationshipValidation::valid()
245    }
246
247    /// Checks if a relationship would create a circular reference.
248    pub fn check_circular(&mut self, source_id: &str, target_id: &str) -> bool {
249        if !self.config.allow_circular {
250            // Simple check: direct circular reference
251            if source_id == target_id {
252                return true;
253            }
254
255            // DFS to check for circular paths
256            self.visited.clear();
257            self.visited.insert(source_id.to_string());
258
259            return self.has_path_to(target_id, source_id, 0);
260        }
261
262        false
263    }
264
265    /// Returns the number of relationships generated.
266    pub fn count(&self) -> u64 {
267        self.count
268    }
269
270    /// Resets the generator.
271    pub fn reset(&mut self, seed: u64) {
272        self.rng = ChaCha8Rng::seed_from_u64(seed);
273        self.count = 0;
274        self.relationships_by_source.clear();
275        self.relationships_by_target.clear();
276        self.visited.clear();
277    }
278
279    /// Returns the configuration.
280    pub fn config(&self) -> &RelationshipConfig {
281        &self.config
282    }
283
284    /// Groups nodes by their type.
285    fn group_nodes_by_type(&self, nodes: &[NodeRef]) -> HashMap<String, Vec<NodeRef>> {
286        let mut grouped: HashMap<String, Vec<NodeRef>> = HashMap::new();
287
288        for node in nodes {
289            grouped
290                .entry(node.node_type.clone())
291                .or_default()
292                .push(node.clone());
293        }
294
295        grouped
296    }
297
298    /// Generates relationships for a specific relationship type.
299    fn generate_for_type(
300        &mut self,
301        rel_type: &RelationshipTypeConfig,
302        nodes_by_type: &HashMap<String, Vec<NodeRef>>,
303    ) -> Vec<GeneratedRelationship> {
304        let mut relationships = Vec::new();
305
306        let Some(source_nodes) = nodes_by_type.get(&rel_type.source_type) else {
307            return relationships;
308        };
309
310        let Some(target_nodes) = nodes_by_type.get(&rel_type.target_type) else {
311            return relationships;
312        };
313
314        for source in source_nodes {
315            let rels = self.generate_edges_for_node(source, target_nodes, rel_type);
316            relationships.extend(rels);
317        }
318
319        relationships
320    }
321
322    /// Generates edges from a single source node.
323    fn generate_edges_for_node(
324        &mut self,
325        source: &NodeRef,
326        targets: &[NodeRef],
327        rel_type: &RelationshipTypeConfig,
328    ) -> Vec<GeneratedRelationship> {
329        let mut relationships = Vec::new();
330
331        if targets.is_empty() {
332            return relationships;
333        }
334
335        // Determine number of relationships based on cardinality
336        let (min, max) = rel_type.cardinality.bounds();
337        let count = if min == max {
338            min as usize
339        } else {
340            self.rng.gen_range(min..=max) as usize
341        };
342
343        // Filter available targets
344        let available_targets: Vec<_> = targets
345            .iter()
346            .filter(|t| {
347                // Check if this relationship is valid
348                let validation = self.check_cardinality(&source.id, &t.id, &rel_type.name);
349                if !validation.valid {
350                    return false;
351                }
352
353                // Check for circular references
354                if self.check_circular(&source.id, &t.id) {
355                    return false;
356                }
357
358                true
359            })
360            .collect();
361
362        if available_targets.is_empty() && rel_type.required {
363            // Log warning or handle required relationship with no valid targets
364            return relationships;
365        }
366
367        // Select targets
368        let selected_count = count.min(available_targets.len());
369        let mut selected_indices: Vec<usize> = (0..available_targets.len()).collect();
370        selected_indices.shuffle(&mut self.rng);
371        selected_indices.truncate(selected_count);
372
373        for idx in selected_indices {
374            let target = available_targets[idx];
375            let relationship = self.create_relationship(source, target, rel_type);
376
377            // Track the relationship for cardinality validation
378            self.track_relationship(&source.id, &target.id, &rel_type.name);
379
380            relationships.push(relationship);
381        }
382
383        relationships
384    }
385
386    /// Creates a single relationship.
387    fn create_relationship(
388        &mut self,
389        source: &NodeRef,
390        target: &NodeRef,
391        rel_type: &RelationshipTypeConfig,
392    ) -> GeneratedRelationship {
393        self.count += 1;
394
395        let id = Uuid::new_v4().to_string();
396        let properties = self.generate_properties(source, target, &rel_type.properties);
397
398        let metadata = RelationshipMetadata {
399            source: "datasynth".to_string(),
400            generated_at: Utc::now(),
401            weight: Some(rel_type.weight),
402            valid_from: None,
403            valid_to: None,
404            labels: HashMap::new(),
405            features: None,
406            is_directed: rel_type.directed,
407        };
408
409        GeneratedRelationship {
410            relationship_type: rel_type.name.clone(),
411            id,
412            source_id: source.id.clone(),
413            target_id: target.id.clone(),
414            properties,
415            metadata,
416        }
417    }
418
419    /// Generates properties for a relationship.
420    fn generate_properties(
421        &mut self,
422        source: &NodeRef,
423        target: &NodeRef,
424        rules: &[super::rules::PropertyGenerationRule],
425    ) -> HashMap<String, Value> {
426        let mut properties = HashMap::new();
427
428        for rule in rules {
429            let value =
430                self.generate_property_value(source, target, &rule.generator, &rule.value_type);
431            properties.insert(rule.name.clone(), value);
432        }
433
434        properties
435    }
436
437    /// Generates a single property value.
438    fn generate_property_value(
439        &mut self,
440        source: &NodeRef,
441        target: &NodeRef,
442        generator: &PropertyGenerator,
443        value_type: &PropertyValueType,
444    ) -> Value {
445        match generator {
446            PropertyGenerator::Constant(value) => value.clone(),
447
448            PropertyGenerator::RandomChoice(choices) => {
449                if choices.is_empty() {
450                    Value::Null
451                } else {
452                    let idx = self.rng.gen_range(0..choices.len());
453                    choices[idx].clone()
454                }
455            }
456
457            PropertyGenerator::Range { min, max } => {
458                let value = self.rng.gen_range(*min..=*max);
459                match value_type {
460                    PropertyValueType::Integer => {
461                        Value::Number(serde_json::Number::from(value as i64))
462                    }
463                    _ => Value::Number(
464                        serde_json::Number::from_f64(value)
465                            .unwrap_or_else(|| serde_json::Number::from(0)),
466                    ),
467                }
468            }
469
470            PropertyGenerator::FromSourceProperty(prop_name) => source
471                .properties
472                .get(prop_name)
473                .cloned()
474                .unwrap_or(Value::Null),
475
476            PropertyGenerator::FromTargetProperty(prop_name) => target
477                .properties
478                .get(prop_name)
479                .cloned()
480                .unwrap_or(Value::Null),
481
482            PropertyGenerator::Uuid => Value::String(Uuid::new_v4().to_string()),
483
484            PropertyGenerator::Timestamp => Value::String(Utc::now().to_rfc3339()),
485        }
486    }
487
488    /// Tracks a relationship for cardinality validation.
489    fn track_relationship(&mut self, source_id: &str, target_id: &str, rel_type: &str) {
490        // Track by source
491        self.relationships_by_source
492            .entry(source_id.to_string())
493            .or_default()
494            .entry(rel_type.to_string())
495            .or_default()
496            .push(target_id.to_string());
497
498        // Track by target
499        self.relationships_by_target
500            .entry(target_id.to_string())
501            .or_default()
502            .entry(rel_type.to_string())
503            .or_default()
504            .push(source_id.to_string());
505    }
506
507    /// DFS to check if there's a path from current to target.
508    fn has_path_to(&mut self, current: &str, target: &str, depth: u32) -> bool {
509        if depth >= self.config.max_circular_depth {
510            return false;
511        }
512
513        if current == target {
514            return true;
515        }
516
517        if self.visited.contains(current) {
518            return false;
519        }
520
521        self.visited.insert(current.to_string());
522
523        // Collect all next nodes to avoid holding borrow during recursion
524        let next_nodes: Vec<String> = self
525            .relationships_by_source
526            .get(current)
527            .map(|outgoing| outgoing.values().flatten().cloned().collect())
528            .unwrap_or_default();
529
530        // Now check paths without holding the borrow
531        for next in next_nodes {
532            if self.has_path_to(&next, target, depth + 1) {
533                return true;
534            }
535        }
536
537        false
538    }
539}
540
541/// Builder for relationship configuration.
542pub struct RelationshipConfigBuilder {
543    config: RelationshipConfig,
544}
545
546impl RelationshipConfigBuilder {
547    /// Creates a new builder.
548    pub fn new() -> Self {
549        Self {
550            config: RelationshipConfig::default(),
551        }
552    }
553
554    /// Adds a relationship type.
555    pub fn add_type(mut self, type_config: RelationshipTypeConfig) -> Self {
556        self.config.relationship_types.push(type_config);
557        self
558    }
559
560    /// Sets whether orphans are allowed.
561    pub fn allow_orphans(mut self, allow: bool) -> Self {
562        self.config.allow_orphans = allow;
563        self
564    }
565
566    /// Sets the orphan probability.
567    pub fn orphan_probability(mut self, prob: f64) -> Self {
568        self.config.orphan_probability = prob.clamp(0.0, 1.0);
569        self
570    }
571
572    /// Sets whether circular relationships are allowed.
573    pub fn allow_circular(mut self, allow: bool) -> Self {
574        self.config.allow_circular = allow;
575        self
576    }
577
578    /// Sets the maximum circular depth.
579    pub fn max_circular_depth(mut self, depth: u32) -> Self {
580        self.config.max_circular_depth = depth;
581        self
582    }
583
584    /// Builds the configuration.
585    pub fn build(self) -> RelationshipConfig {
586        self.config
587    }
588}
589
590impl Default for RelationshipConfigBuilder {
591    fn default() -> Self {
592        Self::new()
593    }
594}
595
596#[cfg(test)]
597mod tests {
598    use super::*;
599
600    fn create_test_nodes() -> Vec<NodeRef> {
601        vec![
602            NodeRef::new("je_1", "journal_entry"),
603            NodeRef::new("je_2", "journal_entry"),
604            NodeRef::new("acc_1", "account"),
605            NodeRef::new("acc_2", "account"),
606            NodeRef::new("acc_3", "account"),
607            NodeRef::new("user_1", "user"),
608        ]
609    }
610
611    #[test]
612    fn test_generate_relationships() {
613        let config = RelationshipConfig::with_types(vec![RelationshipTypeConfig::new(
614            "debits",
615            "journal_entry",
616            "account",
617        )
618        .with_cardinality(CardinalityRule::one_to_many(1, 2))]);
619
620        let mut generator = RelationshipGenerator::new(config, 42);
621        let nodes = create_test_nodes();
622        let relationships = generator.generate_relationships(&nodes);
623
624        assert!(!relationships.is_empty());
625        for rel in &relationships {
626            assert_eq!(rel.relationship_type, "debits");
627            assert!(rel.source_id.starts_with("je_"));
628            assert!(rel.target_id.starts_with("acc_"));
629        }
630    }
631
632    #[test]
633    fn test_cardinality_validation() {
634        let config = RelationshipConfig::with_types(vec![RelationshipTypeConfig::new(
635            "debits",
636            "journal_entry",
637            "account",
638        )
639        .with_cardinality(CardinalityRule::one_to_one())]);
640
641        let generator = RelationshipGenerator::new(config, 42);
642
643        let validation = generator.check_cardinality("je_1", "acc_1", "debits");
644        assert!(validation.valid);
645
646        let validation = generator.check_cardinality("je_1", "acc_1", "unknown");
647        assert!(!validation.valid);
648    }
649
650    #[test]
651    fn test_circular_detection() {
652        let config = RelationshipConfig::default()
653            .allow_circular(false)
654            .max_circular_depth(3);
655
656        let mut generator = RelationshipGenerator::new(config, 42);
657
658        // Direct circular
659        assert!(generator.check_circular("a", "a"));
660
661        // No circular (different nodes)
662        assert!(!generator.check_circular("a", "b"));
663    }
664
665    #[test]
666    fn test_property_generation() {
667        let config = RelationshipConfig::with_types(vec![RelationshipTypeConfig::new(
668            "test", "source", "target",
669        )
670        .with_property(super::super::rules::PropertyGenerationRule::range(
671            "amount", 100.0, 1000.0,
672        ))
673        .with_property(
674            super::super::rules::PropertyGenerationRule::constant_string("status", "active"),
675        )]);
676
677        let mut generator = RelationshipGenerator::new(config, 42);
678        let nodes = vec![NodeRef::new("s1", "source"), NodeRef::new("t1", "target")];
679
680        let relationships = generator.generate_relationships(&nodes);
681
682        assert!(!relationships.is_empty());
683        let rel = &relationships[0];
684        assert!(rel.properties.contains_key("amount"));
685        assert!(rel.properties.contains_key("status"));
686        assert_eq!(
687            rel.properties.get("status"),
688            Some(&Value::String("active".into()))
689        );
690    }
691
692    #[test]
693    fn test_orphan_generation() {
694        let config = RelationshipConfig::with_types(vec![RelationshipTypeConfig::new(
695            "test", "source", "target",
696        )
697        .with_cardinality(CardinalityRule::one_to_one())])
698        .allow_orphans(true)
699        .orphan_probability(1.0); // Always create orphans
700
701        let mut generator = RelationshipGenerator::new(config, 42);
702
703        let source = NodeRef::new("s1", "source");
704        let available: HashMap<String, Vec<NodeRef>> =
705            [("target".to_string(), vec![NodeRef::new("t1", "target")])]
706                .into_iter()
707                .collect();
708
709        let relationships = generator.generate_for_node(&source, &available);
710        assert!(relationships.is_empty());
711    }
712
713    #[test]
714    fn test_config_builder() {
715        let config = RelationshipConfigBuilder::new()
716            .add_type(RelationshipTypeConfig::new("test", "a", "b"))
717            .allow_orphans(false)
718            .orphan_probability(0.1)
719            .allow_circular(true)
720            .max_circular_depth(5)
721            .build();
722
723        assert_eq!(config.relationship_types.len(), 1);
724        assert!(!config.allow_orphans);
725        assert_eq!(config.orphan_probability, 0.1);
726        assert!(config.allow_circular);
727        assert_eq!(config.max_circular_depth, 5);
728    }
729
730    #[test]
731    fn test_generator_count_and_reset() {
732        let config = RelationshipConfig::with_types(vec![RelationshipTypeConfig::new(
733            "test", "source", "target",
734        )
735        .with_cardinality(CardinalityRule::one_to_one())]);
736
737        let mut generator = RelationshipGenerator::new(config, 42);
738        assert_eq!(generator.count(), 0);
739
740        let nodes = vec![NodeRef::new("s1", "source"), NodeRef::new("t1", "target")];
741        generator.generate_relationships(&nodes);
742
743        assert!(generator.count() > 0);
744
745        generator.reset(42);
746        assert_eq!(generator.count(), 0);
747    }
748}