lake_pulse/
lib.rs

1// Copyright 2025 Adobe. All rights reserved.
2// This file is licensed to you under the Apache License,
3// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
4// or the MIT license (http://opensource.org/licenses/MIT),
5// at your option.
6//
7// Unless required by applicable law or agreed to in writing,
8// this software is distributed on an "AS IS" BASIS, WITHOUT
9// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or
10// implied. See the LICENSE-MIT and LICENSE-APACHE files for the
11// specific language governing permissions and limitations under
12// each license.
13
14//! # Lake Pulse
15//!
16//! A Rust library for analyzing data lake table health across multiple formats and storage providers.
17//!
18//! Lake Pulse provides comprehensive health analysis for data lake tables including Delta Lake,
19//! Apache Iceberg, Apache Hudi, and Lance. It supports multiple cloud storage providers
20//! (AWS S3, Azure Data Lake, GCS) and local filesystems.
21//!
22//! ## Features
23//!
24//! - **Multi-format support**: Delta Lake, Apache Iceberg, Apache Hudi, Lance
25//! - **Cloud storage**: AWS S3, Azure Data Lake Storage, Google Cloud Storage, Local filesystem
26//! - **Health metrics**: File size distribution, partition analysis, data skew detection
27//! - **Advanced analysis**: Schema evolution, time travel metrics, deletion vectors, compaction opportunities
28//! - **Performance tracking**: Built-in timing metrics with Gantt chart visualization
29//!
30//! ## Quick Start
31//!
32//! ### Local Filesystem Example
33//!
34//! ```rust,no_run
35//! use lake_pulse::{Analyzer, StorageConfig};
36//!
37//! # async fn example() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
38//! // Configure storage for local filesystem
39//! let config = StorageConfig::local()
40//!     .with_option("path", "./examples/data");
41//!
42//! // Create analyzer
43//! let analyzer = Analyzer::builder(config)
44//!     .build()
45//!     .await?;
46//!
47//! // Analyze a table (auto-detects format: Delta, Iceberg, Hudi, or Lance)
48//! let report = analyzer.analyze("delta_dataset").await?;
49//!
50//! // Print the health report
51//! println!("{}", report);
52//! # Ok(())
53//! # }
54//! ```
55//!
56//! ### AWS S3 Example
57//!
58//! ```rust,no_run
59//! use lake_pulse::{Analyzer, StorageConfig};
60//!
61//! # async fn example() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
62//! let config = StorageConfig::aws()
63//!     .with_option("bucket", "my-bucket")
64//!     .with_option("region", "us-east-1")
65//!     .with_option("access_key_id", "ACCESS_KEY")
66//!     .with_option("secret_access_key", "SECRET_KEY");
67//!
68//! let analyzer = Analyzer::builder(config).build().await?;
69//! let report = analyzer.analyze("my/table/path").await?;
70//! println!("{}", report);
71//! # Ok(())
72//! # }
73//! ```
74//!
75//! ### Azure Data Lake Example
76//!
77//! ```rust,no_run
78//! use lake_pulse::{Analyzer, StorageConfig};
79//!
80//! # async fn example() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
81//! let config = StorageConfig::azure()
82//!     .with_option("container", "my-container")
83//!     .with_option("account_name", "my-account")
84//!     .with_option("tenant_id", "TENANT_ID")
85//!     .with_option("client_id", "CLIENT_ID")
86//!     .with_option("client_secret", "CLIENT_SECRET");
87//!
88//! let analyzer = Analyzer::builder(config).build().await?;
89//! let report = analyzer.analyze("my/table/path").await?;
90//! println!("{}", report);
91//! # Ok(())
92//! # }
93//! ```
94//!
95//! For more examples, see the [`examples/`](https://github.com/adobe/lake-pulse/tree/main/examples) directory.
96//!
97//! ## Modules
98//!
99//! - [`analyze`] - Core analysis functionality and table analyzers
100//! - [`storage`] - Cloud storage abstraction layer
101//! - [`reader`] - Table format readers (Delta, Iceberg, Hudi, Lance)
102//! - [`util`] - Utility functions and helpers
103
104pub mod analyze;
105pub mod reader;
106pub mod storage;
107pub mod util;
108
109// Re-export commonly used types
110pub use analyze::metrics::{HealthMetrics, HealthReport};
111pub use analyze::Analyzer;
112pub use storage::StorageConfig;