datafusion_datasource_orc/
options.rs1use std::collections::HashMap;
25
26use datafusion_common::{DataFusionError, Result};
27
28#[derive(Clone, Debug)]
30pub struct OrcReadOptions {
31 pub batch_size: Option<usize>,
33 pub pushdown_predicate: bool,
35 pub metadata_size_hint: Option<usize>,
37}
38
39impl Default for OrcReadOptions {
40 fn default() -> Self {
41 Self {
42 batch_size: None,
43 pushdown_predicate: true,
44 metadata_size_hint: None,
45 }
46 }
47}
48
49impl OrcReadOptions {
50 pub fn with_batch_size(mut self, batch_size: usize) -> Self {
52 self.batch_size = Some(batch_size);
53 self
54 }
55
56 pub fn with_pushdown_predicate(mut self, pushdown_predicate: bool) -> Self {
58 self.pushdown_predicate = pushdown_predicate;
59 self
60 }
61
62 pub fn with_metadata_size_hint(mut self, metadata_size_hint: usize) -> Self {
64 self.metadata_size_hint = Some(metadata_size_hint);
65 self
66 }
67}
68
69#[derive(Clone, Debug, Default)]
71pub struct OrcFormatOptions {
72 pub read: OrcReadOptions,
74 }
76
77impl OrcFormatOptions {
78 pub fn apply_format_options(&mut self, format_options: &HashMap<String, String>) -> Result<()> {
80 for (key, value) in format_options {
81 match key.as_str() {
82 "orc.batch_size" => {
83 self.read.batch_size = Some(parse_usize_option(key, value)?);
84 }
85 "orc.pushdown_predicate" => {
86 self.read.pushdown_predicate = parse_bool_option(key, value)?;
87 }
88 "orc.metadata_size_hint" => {
89 self.read.metadata_size_hint = Some(parse_usize_option(key, value)?);
90 }
91 _ => {
92 }
94 }
95 }
96 Ok(())
97 }
98}
99
100fn parse_bool_option(key: &str, value: &str) -> Result<bool> {
101 value.parse::<bool>().map_err(|_| {
102 DataFusionError::Configuration(format!(
103 "Invalid value for {key}: {value}. Expected true or false."
104 ))
105 })
106}
107
108fn parse_usize_option(key: &str, value: &str) -> Result<usize> {
109 value.parse::<usize>().map_err(|_| {
110 DataFusionError::Configuration(format!(
111 "Invalid value for {key}: {value}. Expected a positive integer."
112 ))
113 })
114}
115
116#[cfg(test)]
117mod tests {
118 use super::*;
119
120 #[test]
121 fn test_orc_read_options_default() {
122 let options = OrcReadOptions::default();
123 assert_eq!(options.batch_size, None);
124 assert!(options.pushdown_predicate);
125 assert_eq!(options.metadata_size_hint, None);
126 }
127
128 #[test]
129 fn test_orc_read_options_builder() {
130 let options = OrcReadOptions::default()
131 .with_batch_size(4096)
132 .with_pushdown_predicate(false)
133 .with_metadata_size_hint(1024);
134
135 assert_eq!(options.batch_size, Some(4096));
136 assert!(!options.pushdown_predicate);
137 assert_eq!(options.metadata_size_hint, Some(1024));
138 }
139
140 #[test]
141 fn test_orc_format_options_default() {
142 let options = OrcFormatOptions::default();
143 assert_eq!(options.read.batch_size, None);
144 assert!(options.read.pushdown_predicate);
145 }
146
147 #[test]
148 fn test_apply_format_options_batch_size() {
149 let mut options = OrcFormatOptions::default();
150 let mut format_opts = HashMap::new();
151 format_opts.insert("orc.batch_size".to_string(), "8192".to_string());
152
153 options.apply_format_options(&format_opts).unwrap();
154 assert_eq!(options.read.batch_size, Some(8192));
155 }
156
157 #[test]
158 fn test_apply_format_options_pushdown_predicate() {
159 let mut options = OrcFormatOptions::default();
160 let mut format_opts = HashMap::new();
161 format_opts.insert("orc.pushdown_predicate".to_string(), "false".to_string());
162
163 options.apply_format_options(&format_opts).unwrap();
164 assert!(!options.read.pushdown_predicate);
165 }
166
167 #[test]
168 fn test_apply_format_options_metadata_size_hint() {
169 let mut options = OrcFormatOptions::default();
170 let mut format_opts = HashMap::new();
171 format_opts.insert("orc.metadata_size_hint".to_string(), "1048576".to_string());
172
173 options.apply_format_options(&format_opts).unwrap();
174 assert_eq!(options.read.metadata_size_hint, Some(1048576));
175 }
176
177 #[test]
178 fn test_apply_format_options_invalid_batch_size() {
179 let mut options = OrcFormatOptions::default();
180 let mut format_opts = HashMap::new();
181 format_opts.insert("orc.batch_size".to_string(), "not_a_number".to_string());
182
183 let result = options.apply_format_options(&format_opts);
184 assert!(result.is_err());
185 let err = result.unwrap_err().to_string();
186 assert!(err.contains("Invalid value for orc.batch_size"));
187 }
188
189 #[test]
190 fn test_apply_format_options_invalid_bool() {
191 let mut options = OrcFormatOptions::default();
192 let mut format_opts = HashMap::new();
193 format_opts.insert("orc.pushdown_predicate".to_string(), "maybe".to_string());
194
195 let result = options.apply_format_options(&format_opts);
196 assert!(result.is_err());
197 let err = result.unwrap_err().to_string();
198 assert!(err.contains("Invalid value for orc.pushdown_predicate"));
199 }
200
201 #[test]
202 fn test_apply_format_options_multiple() {
203 let mut options = OrcFormatOptions::default();
204 let mut format_opts = HashMap::new();
205 format_opts.insert("orc.batch_size".to_string(), "16384".to_string());
206 format_opts.insert("orc.pushdown_predicate".to_string(), "true".to_string());
207 format_opts.insert("orc.metadata_size_hint".to_string(), "2097152".to_string());
208
209 options.apply_format_options(&format_opts).unwrap();
210 assert_eq!(options.read.batch_size, Some(16384));
211 assert!(options.read.pushdown_predicate);
212 assert_eq!(options.read.metadata_size_hint, Some(2097152));
213 }
214
215 #[test]
216 fn test_apply_format_options_unknown_key() {
217 let mut options = OrcFormatOptions::default();
218 let mut format_opts = HashMap::new();
219 format_opts.insert("orc.unknown_option".to_string(), "value".to_string());
220
221 let result = options.apply_format_options(&format_opts);
223 assert!(result.is_ok());
224 }
225}