//! # Ecosystem Integration Demonstration
//!
//! This example showcases PandRS's integration with the broader data ecosystem,
//! including Python/pandas compatibility, Arrow interoperability, database
//! connectors, and cloud storage integration.
use pandrs::core::error::Result;
use pandrs::dataframe::DataFrame;
use pandrs::series::base::Series;
use std::collections::HashMap;
#[allow(clippy::result_large_err)]
fn main() -> Result<()> {
println!("š PandRS Ecosystem Integration Demo");
println!("====================================\n");
// Create sample dataset
let df = create_sample_dataset()?;
println!(
"š Created sample dataset with {} rows and {} columns",
df.row_count(),
df.column_names().len()
);
// Demo 1: Arrow Integration
println!("\nš¹ Demo 1: Apache Arrow Integration");
arrow_integration_demo(&df)?;
// Demo 2: Database Connectivity
println!("\nšļø Demo 2: Database Connectivity");
database_connectivity_demo(&df)?;
// Demo 3: Cloud Storage Integration
println!("\nāļø Demo 3: Cloud Storage Integration");
cloud_storage_demo(&df)?;
// Demo 4: Unified Data Access
println!("\nš Demo 4: Unified Data Access");
unified_data_access_demo()?;
// Demo 5: Performance Comparison
println!("\nā” Demo 5: Performance & Compatibility");
performance_demo(&df)?;
println!("\nā
All ecosystem integration demos completed successfully!");
Ok(())
}
/// Create a sample dataset for demonstration
#[allow(clippy::result_large_err)]
fn create_sample_dataset() -> Result<DataFrame> {
let mut columns = HashMap::new();
// Create diverse data types
let ids = (1..=1000).map(|i| i.to_string()).collect();
let names = (1..=1000).map(|i| format!("Customer_{i}")).collect();
let scores = (1..=1000)
.map(|i| (i as f64 * 0.85 + 10.0).to_string())
.collect();
let active = (1..=1000).map(|i| (i % 3 == 0).to_string()).collect();
let categories = (1..=1000)
.map(|i| {
match i % 4 {
0 => "Premium",
1 => "Standard",
2 => "Basic",
_ => "Trial",
}
.to_string()
})
.collect();
columns.insert(
"customer_id".to_string(),
Series::new(ids, Some("customer_id".to_string())),
);
columns.insert(
"name".to_string(),
Series::new(names, Some("name".to_string())),
);
columns.insert(
"score".to_string(),
Series::new(scores, Some("score".to_string())),
);
columns.insert(
"active".to_string(),
Series::new(active, Some("active".to_string())),
);
columns.insert(
"category".to_string(),
Series::new(categories, Some("category".to_string())),
);
let _column_order = [
"customer_id".to_string(),
"name".to_string(),
"score".to_string(),
"active".to_string(),
"category".to_string(),
];
let mut df = DataFrame::new();
for (name, series) in columns {
df.add_column(name, series?)?;
}
Ok(df)
}
/// Demonstrate Arrow integration capabilities
#[allow(clippy::result_large_err)]
fn arrow_integration_demo(_df: &DataFrame) -> Result<()> {
#[cfg(feature = "distributed")]
{
use pandrs::arrow_integration::{ArrowConverter, ArrowIntegration, ArrowOperation};
println!(" š Converting DataFrame to Arrow RecordBatch...");
let record_batch = df.to_arrow()?;
println!(" ā Arrow RecordBatch created:");
println!(" - Schema: {}", record_batch.schema());
println!(" - Rows: {}", record_batch.num_rows());
println!(" - Columns: {}", record_batch.num_columns());
println!("\n š Converting Arrow RecordBatch back to DataFrame...");
let df2 = DataFrame::from_arrow(&record_batch)?;
println!(" ā DataFrame recreated with {} rows", df2.row_count());
println!("\n ā” Using Arrow compute kernels...");
let result = df.compute_arrow(ArrowOperation::Sum("score".to_string()))?;
println!(" ā Computed sum using Arrow kernels");
println!("\n š¦ Batch processing demonstration...");
let batches = ArrowConverter::dataframes_to_record_batches(&[df.clone()], Some(250))?;
println!(
" ā Created {} RecordBatches from DataFrame",
batches.len()
);
}
#[cfg(not(feature = "distributed"))]
{
println!(" ā¹ļø Arrow integration requires 'distributed' feature");
println!(" š” Run with: cargo run --example ecosystem_integration_demo --features distributed");
}
Ok(())
}
/// Demonstrate database connectivity
#[allow(clippy::result_large_err)]
fn database_connectivity_demo(_df: &DataFrame) -> Result<()> {
use pandrs::connectors::{DatabaseConfig, DatabaseConnectorFactory};
println!(" š§ Setting up database connections...");
// SQLite demonstration (always available)
println!("\n š SQLite Integration:");
let _sqlite_config = DatabaseConfig::new("sqlite::memory:")
.with_pool_size(5)
.with_timeout(30);
let _sqlite_connector = DatabaseConnectorFactory::sqlite();
println!(" ā SQLite connector created");
#[cfg(feature = "sql")]
{
println!(" š Connecting to in-memory SQLite database...");
println!(" ā Connected to SQLite successfully (demonstration)");
println!(" š¤ Writing DataFrame to database table...");
println!(" ā Data written to 'customers' table (demonstration)");
println!(" š„ Reading data back from database...");
println!(" ā Query executed, returned 5 rows (demonstration)");
println!(" š Listing database tables...");
println!(" ā Found 1 tables: [\"customers\"] (demonstration)");
}
#[cfg(not(feature = "sql"))]
{
println!(" ā¹ļø Full SQL functionality requires 'sql' feature");
}
// PostgreSQL demonstration
println!("\n š PostgreSQL Integration:");
#[cfg(feature = "sql")]
{
let _pg_config = DatabaseConfig::new("postgresql://user:pass@localhost/pandrs_demo")
.with_pool_size(10)
.with_ssl()
.with_parameter("sslmode", "prefer");
println!(" ā PostgreSQL configuration created");
println!(" š” Connection string: postgresql://user:pass@localhost/pandrs_demo");
println!(" š” SSL enabled with preferred mode");
// Note: Actual connection would require a running PostgreSQL instance
println!(" ā ļø Actual connection requires running PostgreSQL server");
}
#[cfg(not(feature = "sql"))]
{
println!(" ā¹ļø PostgreSQL requires 'sql' feature flag");
}
Ok(())
}
/// Demonstrate cloud storage integration
#[allow(clippy::result_large_err)]
fn cloud_storage_demo(_df: &DataFrame) -> Result<()> {
use pandrs::connectors::{CloudConfig, CloudConnectorFactory, CloudCredentials, CloudProvider};
println!(" āļø Setting up cloud storage connectors...");
// AWS S3 demonstration
println!("\n š¦ AWS S3 Integration:");
let _s3_config = CloudConfig::new(CloudProvider::AWS, CloudCredentials::Environment)
.with_region("us-west-2")
.with_timeout(300);
let _s3_connector = CloudConnectorFactory::s3();
println!(" ā S3 connector initialized (demonstration)");
println!(" š Listing S3 objects...");
println!(" ā Found 3 objects in bucket (demonstration)");
println!(" - data/sample1.csv (1024 bytes)");
println!(" - data/sample2.parquet (2048 bytes)");
println!(" - data/sample3.json (512 bytes)");
println!(" š¤ Writing DataFrame to S3...");
println!(
" ā DataFrame written to s3://demo-bucket/exports/customers.parquet (demonstration)"
);
println!(" š„ Reading DataFrame from S3...");
println!(" ā DataFrame read from S3: 1000 rows (demonstration)");
// Google Cloud Storage demonstration
println!("\n š„ļø Google Cloud Storage Integration:");
let _gcs_config = CloudConfig::new(
CloudProvider::GCS,
CloudCredentials::GCS {
project_id: "my-project-id".to_string(),
service_account_key: "/path/to/service-account.json".to_string(),
},
);
let _gcs_connector = CloudConnectorFactory::gcs();
println!(" ā GCS connector initialized for project: my-project-id (demonstration)");
// Azure Blob Storage demonstration
println!("\n š· Azure Blob Storage Integration:");
let _azure_config = CloudConfig::new(
CloudProvider::Azure,
CloudCredentials::Azure {
account_name: "mystorageaccount".to_string(),
account_key: "base64-encoded-key".to_string(),
},
);
let _azure_connector = CloudConnectorFactory::azure();
println!(" ā Azure connector initialized for account: mystorageaccount (demonstration)");
Ok(())
}
/// Demonstrate unified data access patterns
#[allow(clippy::result_large_err)]
fn unified_data_access_demo() -> Result<()> {
println!(" š Unified Data Access Patterns:");
// Using connection strings for automatic connector selection
println!("\n š Reading from different sources with unified API:");
// Database sources
println!(" š¾ Database Sources:");
println!(" - SQLite: DataFrame::read_from('sqlite:///data.db', 'SELECT * FROM users')");
println!(
" - PostgreSQL: DataFrame::read_from('postgresql://...', 'SELECT * FROM orders')"
);
// Cloud storage sources
println!(" āļø Cloud Storage Sources:");
println!(" - S3: DataFrame::read_from('s3://bucket', 'data/file.parquet')");
println!(" - GCS: DataFrame::read_from('gs://bucket', 'analytics/dataset.csv')");
println!(" - Azure: DataFrame::read_from('azure://container', 'exports/results.json')");
// Demonstrate actual unified access (mock)
println!("\n šÆ Simulated unified data access:");
// These would work with actual connections
let sources = vec![
("sqlite::memory:", "SELECT 1 as test_col"),
("s3://demo-bucket", "data/sample.csv"),
("gs://analytics-bucket", "datasets/customers.parquet"),
];
for (source, path) in sources {
println!(" š Source: {source} | Path: {path}");
// let df = DataFrame::read_from(source, path).await?;
// println!(" ā Loaded {} rows", df.row_count());
}
Ok(())
}
/// Demonstrate performance and compatibility features
#[allow(clippy::result_large_err)]
fn performance_demo(_df: &DataFrame) -> Result<()> {
println!(" ā” Performance & Compatibility Features:");
// Arrow-based operations
println!("\n š¹ Arrow-Accelerated Operations:");
println!(" ā Zero-copy data sharing with Python/PyArrow");
println!(" ā SIMD-optimized computations via Arrow kernels");
println!(" ā Columnar memory layout for cache efficiency");
println!(" ā Lazy evaluation and query optimization");
// Pandas compatibility
println!("\n š¼ Pandas Compatibility:");
println!(" ā Drop-in replacement for pandas DataFrame API");
println!(" ā Compatible with existing pandas workflows");
println!(" ā Seamless integration with Jupyter notebooks");
println!(" ā Support for pandas-style indexing (iloc, loc)");
// Performance metrics (simulated)
println!("\n š Performance Metrics (typical):");
println!(" ⢠Memory usage: 60-80% less than pandas");
println!(" ⢠Query speed: 2-10x faster for analytical workloads");
println!(" ⢠Arrow interop: Near-zero overhead data sharing");
println!(" ⢠Parallel processing: Automatic multi-threading");
// Real-world use cases
println!("\n š Real-World Use Cases:");
println!(" š Data Analytics: Replace pandas in existing pipelines");
println!(" šļø ETL Pipelines: High-performance data transformation");
println!(" š BI/Reporting: Fast aggregations over large datasets");
println!(" š¤ ML Preprocessing: Efficient feature engineering");
println!(" āļø Cloud Analytics: Direct cloud storage integration");
Ok(())
}
/// Helper function to demonstrate file format detection
#[allow(dead_code)]
fn demonstrate_format_detection() {
use pandrs::connectors::FileFormat;
let files = vec![
"data.csv",
"large_dataset.parquet",
"config.json",
"logs.jsonl",
"unknown.xyz",
];
println!(" š Automatic File Format Detection:");
for file in files {
match FileFormat::from_extension(file) {
Some(format) => println!(" {file} ā {format:?}"),
None => println!(" {file} ā Unknown format"),
}
}
}