#![allow(clippy::uninlined_format_args)]
#![allow(clippy::useless_vec)]
#![allow(clippy::enum_variant_names)]
#![allow(clippy::new_without_default)]
#![allow(clippy::needless_borrows_for_generic_args)]
#![allow(clippy::unnecessary_cast)]
#![allow(clippy::never_loop)]
#![allow(clippy::collapsible_if)]
use crate::{Edge, Node, PropertyValue};
use rand::Rng;
pub struct DataGenerator {
rng: rand::rngs::ThreadRng,
}
impl DataGenerator {
pub fn new() -> Self {
Self {
rng: rand::thread_rng(),
}
}
pub fn generate_social_network(
&mut self,
num_users: usize,
avg_connections: usize,
) -> (Vec<Node>, Vec<Edge>) {
let mut nodes = Vec::with_capacity(num_users);
let mut edges = Vec::new();
let mut edge_id_counter = 1u64;
for i in 0..num_users {
let mut node = Node::new((i + 1) as u64);
node.labels.push("User".to_string());
node.properties.insert(
"name".to_string(),
PropertyValue::String(format!("User{}", i + 1)),
);
node.properties.insert(
"age".to_string(),
PropertyValue::Int(self.rng.gen_range(18..65)),
);
node.properties.insert(
"active".to_string(),
PropertyValue::Bool(self.rng.gen_bool(0.8)),
);
node.properties.insert(
"join_date".to_string(),
PropertyValue::Int(1609459200 + self.rng.gen_range(0..86400 * 365)),
); node.properties.insert(
"score".to_string(),
PropertyValue::Float(self.rng.gen_range(0.0..1000.0)),
);
nodes.push(node);
}
for i in 0..num_users {
let num_connections = self.rng.gen_range(0..avg_connections * 2);
for _ in 0..num_connections {
let target = self.rng.gen_range(0..num_users);
if target != i {
let source_id = (i + 1) as u64;
let target_id = (target + 1) as u64;
if !edges.iter().any(|e: &Edge| {
(e.source_node_id == source_id && e.target_node_id == target_id)
|| (e.source_node_id == target_id && e.target_node_id == source_id)
}) {
let mut edge =
Edge::new(edge_id_counter, source_id, target_id, "FRIENDS_WITH");
edge_id_counter += 1;
edge.properties.insert(
"since".to_string(),
PropertyValue::Int(1609459200 + self.rng.gen_range(0..86400 * 365)),
);
edge.properties.insert(
"strength".to_string(),
PropertyValue::Float(self.rng.gen_range(0.1..1.0)),
);
edges.push(edge);
}
}
}
}
(nodes, edges)
}
pub fn generate_product_catalog(
&mut self,
num_products: usize,
num_categories: usize,
) -> (Vec<Node>, Vec<Edge>) {
let mut nodes = Vec::with_capacity(num_products + num_categories);
let mut edges = Vec::new();
let mut edge_id_counter = 1u64;
for i in 0..num_categories {
let mut node = Node::new((i + 1) as u64);
node.labels.push("Category".to_string());
node.properties.insert(
"name".to_string(),
PropertyValue::String(format!("Category{}", i + 1)),
);
node.properties.insert(
"description".to_string(),
PropertyValue::String(format!("Description for category {}", i + 1)),
);
node.properties.insert(
"level".to_string(),
PropertyValue::Int(self.rng.gen_range(1..4)),
);
nodes.push(node);
}
for i in 0..num_products {
let mut node = Node::new((num_categories + i + 1) as u64);
node.labels.push("Product".to_string());
node.properties.insert(
"name".to_string(),
PropertyValue::String(format!("Product{}", i + 1)),
);
node.properties.insert(
"price".to_string(),
PropertyValue::Float(self.rng.gen_range(1.0..1000.0)),
);
node.properties.insert(
"stock".to_string(),
PropertyValue::Int(self.rng.gen_range(0..1000)),
);
node.properties.insert(
"rating".to_string(),
PropertyValue::Float(self.rng.gen_range(1.0..5.0)),
);
node.properties.insert(
"available".to_string(),
PropertyValue::Bool(self.rng.gen_bool(0.9)),
);
nodes.push(node);
let num_categories_for_product = self.rng.gen_range(1..=3);
for _ in 0..num_categories_for_product {
let category_id = (self.rng.gen_range(1..=num_categories)) as u64;
let product_id = (num_categories + i + 1) as u64;
let mut edge = Edge::new(edge_id_counter, product_id, category_id, "BELONGS_TO");
edge_id_counter += 1;
edge.properties.insert(
"relevance".to_string(),
PropertyValue::Float(self.rng.gen_range(0.5..1.0)),
);
edges.push(edge);
}
}
(nodes, edges)
}
pub fn generate_knowledge_graph(
&mut self,
num_entities: usize,
num_relationships: usize,
) -> (Vec<Node>, Vec<Edge>) {
let mut nodes = Vec::with_capacity(num_entities);
let mut edges = Vec::new();
let mut edge_id_counter = 1u64;
let entity_types = vec!["Person", "Organization", "Location", "Event", "Concept"];
let relationship_types = vec![
"WORKS_FOR",
"LOCATED_IN",
"PARTICIPATES_IN",
"RELATED_TO",
"KNOWS",
];
for i in 0..num_entities {
let mut node = Node::new((i + 1) as u64);
let entity_type = entity_types[self.rng.gen_range(0..entity_types.len())];
node.labels.push(entity_type.to_string());
node.properties.insert(
"name".to_string(),
PropertyValue::String(format!("Entity{}", i + 1)),
);
node.properties.insert(
"description".to_string(),
PropertyValue::String(format!("Description for entity {}", i + 1)),
);
node.properties.insert(
"confidence".to_string(),
PropertyValue::Float(self.rng.gen_range(0.5..1.0)),
);
node.properties.insert(
"created_at".to_string(),
PropertyValue::Int(1609459200 + self.rng.gen_range(0..86400 * 730)),
);
nodes.push(node);
}
for _ in 0..num_relationships {
let source_id = (self.rng.gen_range(1..=num_entities)) as u64;
let target_id = (self.rng.gen_range(1..=num_entities)) as u64;
if source_id != target_id {
let relationship_type =
relationship_types[self.rng.gen_range(0..relationship_types.len())];
let mut edge = Edge::new(edge_id_counter, source_id, target_id, relationship_type);
edge_id_counter += 1;
edge.properties.insert(
"weight".to_string(),
PropertyValue::Float(self.rng.gen_range(0.1..1.0)),
);
edge.properties.insert(
"verified".to_string(),
PropertyValue::Bool(self.rng.gen_bool(0.7)),
);
edges.push(edge);
}
}
(nodes, edges)
}
pub fn generate_small_dataset(&mut self) -> (Vec<Node>, Vec<Edge>) {
self.generate_social_network(100, 10)
}
pub fn generate_medium_dataset(&mut self) -> (Vec<Node>, Vec<Edge>) {
self.generate_social_network(1000, 25)
}
pub fn generate_large_dataset(&mut self) -> (Vec<Node>, Vec<Edge>) {
self.generate_social_network(5000, 50)
}
pub fn generate_xlarge_dataset(&mut self) -> (Vec<Node>, Vec<Edge>) {
self.generate_social_network(50000, 100)
}
pub fn generate_xxlarge_dataset(&mut self) -> (Vec<Node>, Vec<Edge>) {
self.generate_social_network(100000, 100)
}
pub fn generate_xxxlarge_dataset(&mut self) -> (Vec<Node>, Vec<Edge>) {
self.generate_social_network(500000, 50)
}
}
impl Default for DataGenerator {
fn default() -> Self {
Self::new()
}
}