datafusion_delta_sharing/client/
action.rs

1//! Basic types for describing table data and metadata
2
3use std::collections::HashMap;
4
5use serde::Deserialize;
6
7/// Representation of the table protocol.
8#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
9#[serde(rename_all = "camelCase")]
10pub struct Protocol {
11    /// The minimum version of the protocol that the client must support.
12    min_reader_version: u32,
13}
14
15impl Protocol {
16    /// Retrieve the minimum version of the protocol that the client must
17    /// implement to read this table.
18    pub fn min_reader_version(&self) -> u32 {
19        self.min_reader_version
20    }
21}
22
23impl Default for Protocol {
24    fn default() -> Self {
25        Self {
26            min_reader_version: 1,
27        }
28    }
29}
30
31/// Representation of the table format.
32#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
33#[serde(rename_all = "camelCase")]
34pub struct Format {
35    /// The format of the data files backing the shared table.
36    provider: String,
37    options: Option<HashMap<String, String>>,
38}
39
40impl Format {
41    /// Retrieve the format provider.
42    pub fn provider(&self) -> &str {
43        self.provider.as_ref()
44    }
45
46    /// Retrieve the format options.
47    pub fn options(&self) -> Option<&HashMap<String, String>> {
48        self.options.as_ref()
49    }
50}
51
52impl Default for Format {
53    fn default() -> Self {
54        Self {
55            provider: String::from("parquet"),
56            options: None,
57        }
58    }
59}
60
61/// Representation of the table metadata.
62///
63/// The metadata of a table contains all the information required to correctly
64/// interpret the data files of the table.
65#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
66#[serde(rename_all = "camelCase")]
67pub struct Metadata {
68    id: String,
69    name: Option<String>,
70    description: Option<String>,
71    format: Format,
72    schema_string: String,
73    partition_columns: Vec<String>,
74    #[serde(default)]
75    configuration: HashMap<String, String>,
76    version: Option<String>,
77    size: Option<u64>,
78    num_files: Option<u64>,
79}
80
81impl Metadata {
82    /// Retrieve the unique table identifier.
83    pub fn id(&self) -> &str {
84        &self.id
85    }
86
87    /// Retrieve the table name provided by the user.
88    pub fn name(&self) -> Option<&str> {
89        self.name.as_deref()
90    }
91
92    /// Retrieve the table description provided by the user.
93    pub fn description(&self) -> Option<&str> {
94        self.description.as_deref()
95    }
96
97    /// Retrieve the specification of the table format.
98    pub fn format(&self) -> &Format {
99        &self.format
100    }
101
102    /// Retrieve the schema of the table, serialized as a string.
103    pub fn schema_string(&self) -> &str {
104        &self.schema_string
105    }
106
107    /// Retrieve an array of column names that are used to partition the table.
108    pub fn partition_columns(&self) -> &[String] {
109        &self.partition_columns
110    }
111
112    /// Retrieve a map containing configuration options for the table.
113    pub fn configuration(&self) -> &HashMap<String, String> {
114        &self.configuration
115    }
116
117    /// Retrieve the version of the table this metadata corresponds to.
118    pub fn version(&self) -> Option<&str> {
119        self.version.as_deref()
120    }
121
122    /// Retrieve the size of the table in bytes.
123    pub fn size(&self) -> Option<u64> {
124        self.size
125    }
126
127    /// Retrieve the number of files in the table.
128    pub fn num_files(&self) -> Option<u64> {
129        self.num_files
130    }
131}
132
133/// Representation of data that is part of a table.
134#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
135#[serde(rename_all = "camelCase")]
136pub struct File {
137    url: String,
138    id: String,
139    partition_values: HashMap<String, Option<String>>,
140    size: u64,
141    #[serde(skip_serializing_if = "Option::is_none")]
142    stats: Option<String>,
143    #[serde(skip_serializing_if = "Option::is_none")]
144    version: Option<u64>,
145    #[serde(skip_serializing_if = "Option::is_none")]
146    timestamp: Option<u64>,
147    #[serde(skip_serializing_if = "Option::is_none")]
148    expiration_timestamp: Option<u64>,
149}
150
151impl File {
152    /// An HTTPS url that a client can use to directly read the data file.
153    pub fn url(&self) -> &str {
154        self.url.as_ref()
155    }
156
157    /// A mutable HTTPS url that a client can use to directly read the data file.
158    pub fn url_mut(&mut self) -> &mut String {
159        &mut self.url
160    }
161
162    /// A unique identifier for the data file in the table.
163    pub fn id(&self) -> &str {
164        self.id.as_ref()
165    }
166
167    /// A map from partition column to value for this file in the table.
168    pub fn partition_values(&self) -> HashMap<String, String> {
169        self.partition_values
170            .iter()
171            .map(|(k, v)| (k.clone(), v.clone().unwrap_or_default()))
172            .collect()
173    }
174
175    /// The size of this file in bytes.
176    pub fn size(&self) -> u64 {
177        self.size
178    }
179
180    /// Summary statistics about the data in this file.
181    pub fn stats(&self) -> Option<&str> {
182        self.stats.as_deref()
183    }
184
185    /// The table version associated with this file.
186    pub fn version(&self) -> Option<u64> {
187        self.version
188    }
189
190    /// The unix timestamp in milliseconds corresponding to the table version
191    /// associated with this file.
192    pub fn timestamp(&self) -> Option<u64> {
193        self.timestamp
194    }
195
196    /// The unix timestamp in milliseconds corresponding to the expiration of
197    /// the url associated with this file.
198    pub fn expiration_timestamp(&self) -> Option<u64> {
199        self.expiration_timestamp
200    }
201}