1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
use super::view::ViewMetadata;
use crate::time::TimeRange;
use chrono::{DateTime, Utc};
/// Partition metadata (without embedded file_metadata for performance)
/// Use load_partition_metadata() to load metadata on-demand when needed
#[derive(Clone, Debug)]
pub struct Partition {
/// Metadata about the view this partition belongs to.
pub view_metadata: ViewMetadata,
/// The insert time range for this partition.
pub insert_time_range: TimeRange,
/// The event time range for this partition. None for empty partitions.
pub event_time_range: Option<TimeRange>,
/// The last time this partition was updated.
pub updated: DateTime<Utc>,
/// The path to the Parquet file for this partition. None for empty partitions.
pub file_path: Option<String>,
/// The size of the Parquet file in bytes. 0 for empty partitions.
pub file_size: i64,
/// A hash of the source data that generated this partition.
pub source_data_hash: Vec<u8>,
/// The number of rows in this partition. 0 for empty partitions.
pub num_rows: i64,
}
impl Partition {
/// Returns true if this partition has no data (num_rows = 0).
pub fn is_empty(&self) -> bool {
self.num_rows == 0
}
/// Returns the min event time, if this partition has data.
pub fn min_event_time(&self) -> Option<DateTime<Utc>> {
self.event_time_range.as_ref().map(|r| r.begin)
}
/// Returns the max event time, if this partition has data.
pub fn max_event_time(&self) -> Option<DateTime<Utc>> {
self.event_time_range.as_ref().map(|r| r.end)
}
/// Returns the beginning of the insert time range.
pub fn begin_insert_time(&self) -> DateTime<Utc> {
self.insert_time_range.begin
}
/// Returns the end of the insert time range.
pub fn end_insert_time(&self) -> DateTime<Utc> {
self.insert_time_range.end
}
/// Validates partition invariants. Returns error if partition is inconsistent.
///
/// Invariants:
/// - Non-empty partitions (num_rows > 0) MUST have both event_time_range and file_path
/// - Empty partitions (num_rows = 0) MUST NOT have event_time_range or file_path
/// - num_rows must not be negative
pub fn validate(&self) -> anyhow::Result<()> {
if self.num_rows > 0 {
// Non-empty partition must have event_time_range and file_path
if self.event_time_range.is_none() {
anyhow::bail!(
"non-empty partition (num_rows={}) has no event_time_range",
self.num_rows
);
}
if self.file_path.is_none() {
anyhow::bail!(
"non-empty partition (num_rows={}) has no file_path",
self.num_rows
);
}
} else if self.num_rows == 0 {
// Empty partition must NOT have event_time_range or file_path
if self.event_time_range.is_some() {
anyhow::bail!("empty partition has event_time_range");
}
if self.file_path.is_some() {
anyhow::bail!("empty partition has file_path");
}
} else {
anyhow::bail!("partition has negative num_rows: {}", self.num_rows);
}
Ok(())
}
}