parquet_flamegraph/
lib.rs1pub mod args;
2
3use parquet::file::metadata::ParquetMetaData;
4
5pub fn parquet_column_size_to_flamegraph_format(
7 parquet_metadata: &ParquetMetaData,
8 unit: &args::Unit,
9) -> Vec<String> {
10 let converter_value = match *unit {
11 args::Unit::Bytes => 1,
12 args::Unit::KiloBytes => 1024,
13 args::Unit::MegaBytes => 1024 * 1024,
14 args::Unit::GigaBytes => 1024 * 1024 * 1024,
15 };
16
17 parquet_metadata
18 .row_groups()
19 .iter()
20 .flat_map(|row_group_metadata| {
21 row_group_metadata.columns().iter().map(|column_metadata| {
22 format!(
23 "{} {}",
24 column_metadata.column_path().string().replace(".", ";"),
25 column_metadata.compressed_size() / converter_value
26 )
27 })
28 })
29 .collect()
30}
31
32#[cfg(test)]
35mod tests {
36 use std::{fs::File, path::Path};
37
38 use parquet::file::reader::{FileReader, SerializedFileReader};
39
40 use super::*;
41
42 #[test]
43 fn test_parquet_column_size_to_flamegraph_format_with_nested_paths() {
44 let result = parquet_column_size_to_flamegraph_format(
45 metadata_reader("nested_maps.snappy.parquet").metadata(),
46 &args::Unit::Bytes,
47 );
48 let expected: Vec<String> = vec![
49 "a;key_value;key 69".to_string(),
50 "a;key_value;value;key_value;key 95".to_string(),
51 "a;key_value;value;key_value;value 50".to_string(),
52 "b 56".to_string(),
53 "c 68".to_string(),
54 ];
55 assert_eq!(result, expected);
56 }
57
58 #[test]
59 fn test_parquet_column_size_to_flamegraph_format_with_multiple_row_groups() {
60 let result = parquet_column_size_to_flamegraph_format(
61 metadata_reader("sort_columns.parquet").metadata(),
62 &args::Unit::Bytes,
63 );
64 let expected: Vec<String> = vec![
65 "a 104".to_string(),
66 "b 70".to_string(),
67 "a 104".to_string(),
68 "b 70".to_string(),
69 ];
70 assert_eq!(result, expected);
71 }
72
73 #[test]
74 fn test_parquet_column_size_to_flamegraph_format_with_kilobytes() {
75 let result = parquet_column_size_to_flamegraph_format(
76 metadata_reader("delta_encoding_required_column.parquet").metadata(),
77 &args::Unit::KiloBytes,
78 );
79 let expected: Vec<String> = vec![
80 "c_customer_sk: 0".to_string(),
81 "c_current_cdemo_sk: 0".to_string(),
82 "c_current_hdemo_sk: 0".to_string(),
83 "c_current_addr_sk: 0".to_string(),
84 "c_first_shipto_date_sk: 0".to_string(),
85 "c_first_sales_date_sk: 0".to_string(),
86 "c_birth_day: 0".to_string(),
87 "c_birth_month: 0".to_string(),
88 "c_birth_year: 0".to_string(),
89 "c_customer_id: 0".to_string(),
90 "c_salutation: 0".to_string(),
91 "c_first_name: 0".to_string(),
92 "c_last_name: 0".to_string(),
93 "c_preferred_cust_flag: 0".to_string(),
94 "c_birth_country: 1".to_string(),
95 "c_email_address: 2".to_string(),
96 "c_last_review_date: 0".to_string(),
97 ];
98 assert_eq!(result, expected);
99 }
100
101 fn metadata_reader(file_name: &str) -> SerializedFileReader<File> {
102 let file = File::open(Path::new(&format!("./resources/{}", &file_name))).unwrap();
103 let reader = SerializedFileReader::new(file).unwrap();
104
105 reader
106 }
107}