1use std::collections::{HashMap, HashSet};
7
8use chrono::{DateTime, Utc};
9use rand::prelude::*;
10use rand_chacha::ChaCha8Rng;
11use serde::{Deserialize, Serialize};
12use serde_json::Value;
13use uuid::Uuid;
14
15use super::rules::{
16 CardinalityRule, PropertyGenerator, PropertyValueType, RelationshipConfig,
17 RelationshipTypeConfig, RelationshipValidation,
18};
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct GeneratedRelationship {
23 pub relationship_type: String,
25 pub id: String,
27 pub source_id: String,
29 pub target_id: String,
31 pub properties: HashMap<String, Value>,
33 pub metadata: RelationshipMetadata,
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct RelationshipMetadata {
40 pub source: String,
42 pub generated_at: DateTime<Utc>,
44 pub weight: Option<f64>,
46 pub valid_from: Option<DateTime<Utc>>,
48 pub valid_to: Option<DateTime<Utc>>,
50 pub labels: HashMap<String, String>,
52 pub features: Option<Vec<f64>>,
54 pub is_directed: bool,
56}
57
58impl Default for RelationshipMetadata {
59 fn default() -> Self {
60 Self {
61 source: "datasynth".to_string(),
62 generated_at: Utc::now(),
63 weight: None,
64 valid_from: None,
65 valid_to: None,
66 labels: HashMap::new(),
67 features: None,
68 is_directed: true,
69 }
70 }
71}
72
73#[derive(Debug, Clone)]
75pub struct NodeRef {
76 pub id: String,
78 pub node_type: String,
80 pub properties: HashMap<String, Value>,
82}
83
84impl NodeRef {
85 pub fn new(id: impl Into<String>, node_type: impl Into<String>) -> Self {
87 Self {
88 id: id.into(),
89 node_type: node_type.into(),
90 properties: HashMap::new(),
91 }
92 }
93
94 pub fn with_property(mut self, key: impl Into<String>, value: Value) -> Self {
96 self.properties.insert(key.into(), value);
97 self
98 }
99}
100
101pub struct RelationshipGenerator {
103 config: RelationshipConfig,
105 rng: ChaCha8Rng,
107 count: u64,
109 relationships_by_source: HashMap<String, HashMap<String, Vec<String>>>,
111 relationships_by_target: HashMap<String, HashMap<String, Vec<String>>>,
113 visited: HashSet<String>,
115}
116
117impl RelationshipGenerator {
118 pub fn new(config: RelationshipConfig, seed: u64) -> Self {
120 Self {
121 config,
122 rng: ChaCha8Rng::seed_from_u64(seed),
123 count: 0,
124 relationships_by_source: HashMap::new(),
125 relationships_by_target: HashMap::new(),
126 visited: HashSet::new(),
127 }
128 }
129
130 pub fn with_defaults(seed: u64) -> Self {
132 Self::new(RelationshipConfig::default(), seed)
133 }
134
135 pub fn generate_relationships(&mut self, nodes: &[NodeRef]) -> Vec<GeneratedRelationship> {
137 let mut relationships = Vec::new();
138
139 let nodes_by_type = self.group_nodes_by_type(nodes);
141
142 let relationship_types = self.config.relationship_types.clone();
144
145 for rel_type in &relationship_types {
147 let rels = self.generate_for_type(rel_type, &nodes_by_type);
148 relationships.extend(rels);
149 }
150
151 relationships
152 }
153
154 pub fn generate_for_node(
156 &mut self,
157 node: &NodeRef,
158 available_targets: &HashMap<String, Vec<NodeRef>>,
159 ) -> Vec<GeneratedRelationship> {
160 if self.config.allow_orphans && self.rng.gen_bool(self.config.orphan_probability) {
162 return Vec::new();
163 }
164
165 let mut relationships = Vec::new();
166
167 let applicable_types: Vec<_> = self
169 .config
170 .relationship_types
171 .iter()
172 .filter(|rt| rt.source_type == node.node_type)
173 .cloned()
174 .collect();
175
176 for rel_type in &applicable_types {
177 if let Some(targets) = available_targets.get(&rel_type.target_type) {
178 let rels = self.generate_edges_for_node(node, targets, rel_type);
179 relationships.extend(rels);
180 }
181 }
182
183 relationships
184 }
185
186 pub fn check_cardinality(
188 &self,
189 source_id: &str,
190 target_id: &str,
191 rel_type: &str,
192 ) -> RelationshipValidation {
193 let type_config = self
195 .config
196 .relationship_types
197 .iter()
198 .find(|rt| rt.name == rel_type);
199
200 let Some(type_config) = type_config else {
201 return RelationshipValidation::invalid(format!(
202 "Unknown relationship type: {}",
203 rel_type
204 ));
205 };
206
207 let (_min, max) = type_config.cardinality.bounds();
208
209 let current_count = self
211 .relationships_by_source
212 .get(source_id)
213 .and_then(|m| m.get(rel_type))
214 .map(|v| v.len())
215 .unwrap_or(0);
216
217 if current_count >= max as usize {
218 return RelationshipValidation::invalid(format!(
219 "Source {} already has maximum {} {} relationships",
220 source_id, max, rel_type
221 ));
222 }
223
224 if matches!(
226 type_config.cardinality,
227 CardinalityRule::OneToOne | CardinalityRule::ManyToOne { .. }
228 ) {
229 let target_count = self
230 .relationships_by_target
231 .get(target_id)
232 .and_then(|m| m.get(rel_type))
233 .map(|v| v.len())
234 .unwrap_or(0);
235
236 if target_count > 0 {
237 return RelationshipValidation::invalid(format!(
238 "Target {} already has a {} relationship",
239 target_id, rel_type
240 ));
241 }
242 }
243
244 RelationshipValidation::valid()
245 }
246
247 pub fn check_circular(&mut self, source_id: &str, target_id: &str) -> bool {
249 if !self.config.allow_circular {
250 if source_id == target_id {
252 return true;
253 }
254
255 self.visited.clear();
257 self.visited.insert(source_id.to_string());
258
259 return self.has_path_to(target_id, source_id, 0);
260 }
261
262 false
263 }
264
265 pub fn count(&self) -> u64 {
267 self.count
268 }
269
270 pub fn reset(&mut self, seed: u64) {
272 self.rng = ChaCha8Rng::seed_from_u64(seed);
273 self.count = 0;
274 self.relationships_by_source.clear();
275 self.relationships_by_target.clear();
276 self.visited.clear();
277 }
278
279 pub fn config(&self) -> &RelationshipConfig {
281 &self.config
282 }
283
284 fn group_nodes_by_type(&self, nodes: &[NodeRef]) -> HashMap<String, Vec<NodeRef>> {
286 let mut grouped: HashMap<String, Vec<NodeRef>> = HashMap::new();
287
288 for node in nodes {
289 grouped
290 .entry(node.node_type.clone())
291 .or_default()
292 .push(node.clone());
293 }
294
295 grouped
296 }
297
298 fn generate_for_type(
300 &mut self,
301 rel_type: &RelationshipTypeConfig,
302 nodes_by_type: &HashMap<String, Vec<NodeRef>>,
303 ) -> Vec<GeneratedRelationship> {
304 let mut relationships = Vec::new();
305
306 let Some(source_nodes) = nodes_by_type.get(&rel_type.source_type) else {
307 return relationships;
308 };
309
310 let Some(target_nodes) = nodes_by_type.get(&rel_type.target_type) else {
311 return relationships;
312 };
313
314 for source in source_nodes {
315 let rels = self.generate_edges_for_node(source, target_nodes, rel_type);
316 relationships.extend(rels);
317 }
318
319 relationships
320 }
321
322 fn generate_edges_for_node(
324 &mut self,
325 source: &NodeRef,
326 targets: &[NodeRef],
327 rel_type: &RelationshipTypeConfig,
328 ) -> Vec<GeneratedRelationship> {
329 let mut relationships = Vec::new();
330
331 if targets.is_empty() {
332 return relationships;
333 }
334
335 let (min, max) = rel_type.cardinality.bounds();
337 let count = if min == max {
338 min as usize
339 } else {
340 self.rng.gen_range(min..=max) as usize
341 };
342
343 let available_targets: Vec<_> = targets
345 .iter()
346 .filter(|t| {
347 let validation = self.check_cardinality(&source.id, &t.id, &rel_type.name);
349 if !validation.valid {
350 return false;
351 }
352
353 if self.check_circular(&source.id, &t.id) {
355 return false;
356 }
357
358 true
359 })
360 .collect();
361
362 if available_targets.is_empty() && rel_type.required {
363 return relationships;
365 }
366
367 let selected_count = count.min(available_targets.len());
369 let mut selected_indices: Vec<usize> = (0..available_targets.len()).collect();
370 selected_indices.shuffle(&mut self.rng);
371 selected_indices.truncate(selected_count);
372
373 for idx in selected_indices {
374 let target = available_targets[idx];
375 let relationship = self.create_relationship(source, target, rel_type);
376
377 self.track_relationship(&source.id, &target.id, &rel_type.name);
379
380 relationships.push(relationship);
381 }
382
383 relationships
384 }
385
386 fn create_relationship(
388 &mut self,
389 source: &NodeRef,
390 target: &NodeRef,
391 rel_type: &RelationshipTypeConfig,
392 ) -> GeneratedRelationship {
393 self.count += 1;
394
395 let id = Uuid::new_v4().to_string();
396 let properties = self.generate_properties(source, target, &rel_type.properties);
397
398 let metadata = RelationshipMetadata {
399 source: "datasynth".to_string(),
400 generated_at: Utc::now(),
401 weight: Some(rel_type.weight),
402 valid_from: None,
403 valid_to: None,
404 labels: HashMap::new(),
405 features: None,
406 is_directed: rel_type.directed,
407 };
408
409 GeneratedRelationship {
410 relationship_type: rel_type.name.clone(),
411 id,
412 source_id: source.id.clone(),
413 target_id: target.id.clone(),
414 properties,
415 metadata,
416 }
417 }
418
419 fn generate_properties(
421 &mut self,
422 source: &NodeRef,
423 target: &NodeRef,
424 rules: &[super::rules::PropertyGenerationRule],
425 ) -> HashMap<String, Value> {
426 let mut properties = HashMap::new();
427
428 for rule in rules {
429 let value =
430 self.generate_property_value(source, target, &rule.generator, &rule.value_type);
431 properties.insert(rule.name.clone(), value);
432 }
433
434 properties
435 }
436
437 fn generate_property_value(
439 &mut self,
440 source: &NodeRef,
441 target: &NodeRef,
442 generator: &PropertyGenerator,
443 value_type: &PropertyValueType,
444 ) -> Value {
445 match generator {
446 PropertyGenerator::Constant(value) => value.clone(),
447
448 PropertyGenerator::RandomChoice(choices) => {
449 if choices.is_empty() {
450 Value::Null
451 } else {
452 let idx = self.rng.gen_range(0..choices.len());
453 choices[idx].clone()
454 }
455 }
456
457 PropertyGenerator::Range { min, max } => {
458 let value = self.rng.gen_range(*min..=*max);
459 match value_type {
460 PropertyValueType::Integer => {
461 Value::Number(serde_json::Number::from(value as i64))
462 }
463 _ => Value::Number(
464 serde_json::Number::from_f64(value)
465 .unwrap_or_else(|| serde_json::Number::from(0)),
466 ),
467 }
468 }
469
470 PropertyGenerator::FromSourceProperty(prop_name) => source
471 .properties
472 .get(prop_name)
473 .cloned()
474 .unwrap_or(Value::Null),
475
476 PropertyGenerator::FromTargetProperty(prop_name) => target
477 .properties
478 .get(prop_name)
479 .cloned()
480 .unwrap_or(Value::Null),
481
482 PropertyGenerator::Uuid => Value::String(Uuid::new_v4().to_string()),
483
484 PropertyGenerator::Timestamp => Value::String(Utc::now().to_rfc3339()),
485 }
486 }
487
488 fn track_relationship(&mut self, source_id: &str, target_id: &str, rel_type: &str) {
490 self.relationships_by_source
492 .entry(source_id.to_string())
493 .or_default()
494 .entry(rel_type.to_string())
495 .or_default()
496 .push(target_id.to_string());
497
498 self.relationships_by_target
500 .entry(target_id.to_string())
501 .or_default()
502 .entry(rel_type.to_string())
503 .or_default()
504 .push(source_id.to_string());
505 }
506
507 fn has_path_to(&mut self, current: &str, target: &str, depth: u32) -> bool {
509 if depth >= self.config.max_circular_depth {
510 return false;
511 }
512
513 if current == target {
514 return true;
515 }
516
517 if self.visited.contains(current) {
518 return false;
519 }
520
521 self.visited.insert(current.to_string());
522
523 let next_nodes: Vec<String> = self
525 .relationships_by_source
526 .get(current)
527 .map(|outgoing| outgoing.values().flatten().cloned().collect())
528 .unwrap_or_default();
529
530 for next in next_nodes {
532 if self.has_path_to(&next, target, depth + 1) {
533 return true;
534 }
535 }
536
537 false
538 }
539}
540
541pub struct RelationshipConfigBuilder {
543 config: RelationshipConfig,
544}
545
546impl RelationshipConfigBuilder {
547 pub fn new() -> Self {
549 Self {
550 config: RelationshipConfig::default(),
551 }
552 }
553
554 pub fn add_type(mut self, type_config: RelationshipTypeConfig) -> Self {
556 self.config.relationship_types.push(type_config);
557 self
558 }
559
560 pub fn allow_orphans(mut self, allow: bool) -> Self {
562 self.config.allow_orphans = allow;
563 self
564 }
565
566 pub fn orphan_probability(mut self, prob: f64) -> Self {
568 self.config.orphan_probability = prob.clamp(0.0, 1.0);
569 self
570 }
571
572 pub fn allow_circular(mut self, allow: bool) -> Self {
574 self.config.allow_circular = allow;
575 self
576 }
577
578 pub fn max_circular_depth(mut self, depth: u32) -> Self {
580 self.config.max_circular_depth = depth;
581 self
582 }
583
584 pub fn build(self) -> RelationshipConfig {
586 self.config
587 }
588}
589
590impl Default for RelationshipConfigBuilder {
591 fn default() -> Self {
592 Self::new()
593 }
594}
595
596#[cfg(test)]
597mod tests {
598 use super::*;
599
600 fn create_test_nodes() -> Vec<NodeRef> {
601 vec![
602 NodeRef::new("je_1", "journal_entry"),
603 NodeRef::new("je_2", "journal_entry"),
604 NodeRef::new("acc_1", "account"),
605 NodeRef::new("acc_2", "account"),
606 NodeRef::new("acc_3", "account"),
607 NodeRef::new("user_1", "user"),
608 ]
609 }
610
611 #[test]
612 fn test_generate_relationships() {
613 let config = RelationshipConfig::with_types(vec![RelationshipTypeConfig::new(
614 "debits",
615 "journal_entry",
616 "account",
617 )
618 .with_cardinality(CardinalityRule::one_to_many(1, 2))]);
619
620 let mut generator = RelationshipGenerator::new(config, 42);
621 let nodes = create_test_nodes();
622 let relationships = generator.generate_relationships(&nodes);
623
624 assert!(!relationships.is_empty());
625 for rel in &relationships {
626 assert_eq!(rel.relationship_type, "debits");
627 assert!(rel.source_id.starts_with("je_"));
628 assert!(rel.target_id.starts_with("acc_"));
629 }
630 }
631
632 #[test]
633 fn test_cardinality_validation() {
634 let config = RelationshipConfig::with_types(vec![RelationshipTypeConfig::new(
635 "debits",
636 "journal_entry",
637 "account",
638 )
639 .with_cardinality(CardinalityRule::one_to_one())]);
640
641 let generator = RelationshipGenerator::new(config, 42);
642
643 let validation = generator.check_cardinality("je_1", "acc_1", "debits");
644 assert!(validation.valid);
645
646 let validation = generator.check_cardinality("je_1", "acc_1", "unknown");
647 assert!(!validation.valid);
648 }
649
650 #[test]
651 fn test_circular_detection() {
652 let config = RelationshipConfig::default()
653 .allow_circular(false)
654 .max_circular_depth(3);
655
656 let mut generator = RelationshipGenerator::new(config, 42);
657
658 assert!(generator.check_circular("a", "a"));
660
661 assert!(!generator.check_circular("a", "b"));
663 }
664
665 #[test]
666 fn test_property_generation() {
667 let config = RelationshipConfig::with_types(vec![RelationshipTypeConfig::new(
668 "test", "source", "target",
669 )
670 .with_property(super::super::rules::PropertyGenerationRule::range(
671 "amount", 100.0, 1000.0,
672 ))
673 .with_property(
674 super::super::rules::PropertyGenerationRule::constant_string("status", "active"),
675 )]);
676
677 let mut generator = RelationshipGenerator::new(config, 42);
678 let nodes = vec![NodeRef::new("s1", "source"), NodeRef::new("t1", "target")];
679
680 let relationships = generator.generate_relationships(&nodes);
681
682 assert!(!relationships.is_empty());
683 let rel = &relationships[0];
684 assert!(rel.properties.contains_key("amount"));
685 assert!(rel.properties.contains_key("status"));
686 assert_eq!(
687 rel.properties.get("status"),
688 Some(&Value::String("active".into()))
689 );
690 }
691
692 #[test]
693 fn test_orphan_generation() {
694 let config = RelationshipConfig::with_types(vec![RelationshipTypeConfig::new(
695 "test", "source", "target",
696 )
697 .with_cardinality(CardinalityRule::one_to_one())])
698 .allow_orphans(true)
699 .orphan_probability(1.0); let mut generator = RelationshipGenerator::new(config, 42);
702
703 let source = NodeRef::new("s1", "source");
704 let available: HashMap<String, Vec<NodeRef>> =
705 [("target".to_string(), vec![NodeRef::new("t1", "target")])]
706 .into_iter()
707 .collect();
708
709 let relationships = generator.generate_for_node(&source, &available);
710 assert!(relationships.is_empty());
711 }
712
713 #[test]
714 fn test_config_builder() {
715 let config = RelationshipConfigBuilder::new()
716 .add_type(RelationshipTypeConfig::new("test", "a", "b"))
717 .allow_orphans(false)
718 .orphan_probability(0.1)
719 .allow_circular(true)
720 .max_circular_depth(5)
721 .build();
722
723 assert_eq!(config.relationship_types.len(), 1);
724 assert!(!config.allow_orphans);
725 assert_eq!(config.orphan_probability, 0.1);
726 assert!(config.allow_circular);
727 assert_eq!(config.max_circular_depth, 5);
728 }
729
730 #[test]
731 fn test_generator_count_and_reset() {
732 let config = RelationshipConfig::with_types(vec![RelationshipTypeConfig::new(
733 "test", "source", "target",
734 )
735 .with_cardinality(CardinalityRule::one_to_one())]);
736
737 let mut generator = RelationshipGenerator::new(config, 42);
738 assert_eq!(generator.count(), 0);
739
740 let nodes = vec![NodeRef::new("s1", "source"), NodeRef::new("t1", "target")];
741 generator.generate_relationships(&nodes);
742
743 assert!(generator.count() > 0);
744
745 generator.reset(42);
746 assert_eq!(generator.count(), 0);
747 }
748}