1#![allow(clippy::module_name_repetitions)]
2
3use serde::{Deserialize, Serialize};
4use smart_default::SmartDefault;
5
6mod input;
7pub use input::{InputConfig, TitleBoost};
8
9mod output;
10pub use output::OutputConfig;
11
12mod stemming;
13pub use stemming::StemmingConfig;
14
15mod frontmatter;
16pub use self::frontmatter::FrontmatterConfig;
17
18mod file;
19pub use file::{DataSource, File, Filetype};
20
21mod srt;
22pub use srt::{SRTConfig, SRTTimestampFormat};
23
24mod errors;
25pub use errors::ConfigReadError;
26
27#[derive(Serialize, Deserialize, Debug, SmartDefault, PartialEq)]
28#[serde(deny_unknown_fields, default)]
29pub struct Config {
30 pub input: InputConfig,
31 pub output: OutputConfig,
32}
33
34impl TryFrom<&str> for Config {
35 type Error = ConfigReadError;
36
37 fn try_from(value: &str) -> Result<Self, Self::Error> {
38 if value.is_empty() {
39 return Err(ConfigReadError::EmptyString);
40 }
41
42 let toml_output = toml::from_str::<Self>(value);
43 let json_output = serde_json::from_str::<Self>(value);
44
45 match (toml_output, json_output) {
46 (Ok(toml_config), _) => Ok(toml_config),
47
48 (Err(_), Ok(json_config)) => Ok(json_config),
49
50 (Err(toml_error), Err(json_error)) => {
51 if let Some((mut toml_line, mut toml_col)) = toml_error.line_col() {
52 toml_line += 1;
53 toml_col += 1;
54 if toml_line > json_error.line()
55 || (toml_line == json_error.line() && toml_col > json_error.column())
56 {
57 Err(ConfigReadError::UnparseableTomlInput(toml_error))
58 } else {
59 Err(ConfigReadError::UnparseableJsonInput(json_error))
60 }
61 } else {
62 Err(ConfigReadError::UnparseableJsonInput(json_error))
63 }
64 }
65 }
66 }
67}
68
69#[cfg(test)]
70mod tests {
71 use std::collections::HashMap;
72
73 use super::*;
74 use pretty_assertions::assert_eq;
75
76 #[test]
77 fn empty_string_via_tryfrom_returns_error() {
78 let contents = r#""#;
79 let error = Config::try_from(contents).unwrap_err();
80 assert_eq!(error, ConfigReadError::EmptyString);
81 }
82
83 fn get_default_config() -> Config {
84 Config {
85 input: InputConfig {
86 UNUSED_surrounding_word_count: None,
87 base_directory: "test/federalist".into(),
88 url_prefix: String::new(),
89 title_boost: TitleBoost::Moderate,
90 stemming: StemmingConfig::Language(
91 rust_stemmers::Algorithm::English,
92 ),
93 html_selector: None,
94 exclude_html_selector: None,
95 frontmatter_handling: FrontmatterConfig::Omit,
96 files: vec![
97 File {
98 title: "Introduction".into(),
99 url: "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-1".into(),
100 explicit_source: Some(
101 DataSource::FilePath(
102 "federalist-1.txt".into(),
103 ),
104 ),
105 id: None,
106 stemming_override: None,
107 html_selector_override: None,
108 exclude_html_selector_override: None,
109 frontmatter_handling_override: None,
110 filetype: None,
111 fields: HashMap::new(),
112 },
113 File {
114 title: "Concerning Dangers from Foreign Force and Influence".into(),
115 url: "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-2".into(),
116 explicit_source: Some(
117 DataSource::FilePath(
118 "federalist-2.txt".into(),
119 ),
120 ),
121 id: None,
122 stemming_override: None,
123 html_selector_override: None,
124 exclude_html_selector_override: None,
125 frontmatter_handling_override: None,
126 filetype: None,
127 fields: HashMap::new(),
128 },
129 File {
130 title: "Concerning Dangers from Foreign Force and Influence 2".into(),
131 url: "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-3".into(),
132 explicit_source: Some(
133 DataSource::FilePath(
134 "federalist-3.txt".into(),
135 ),
136 ),
137 id: None,
138 stemming_override: None,
139 html_selector_override: None,
140 exclude_html_selector_override: None,
141 frontmatter_handling_override: None,
142 filetype: None,
143 fields: HashMap::new(),
144 },
145 ],
146 break_on_file_error: false,
147 srt_config: SRTConfig {
148 timestamp_linking: true,
149 timestamp_template_string: "&t={ts}".into(),
150 timestamp_format: SRTTimestampFormat::NumberOfSeconds,
151 },
152 minimum_indexed_substring_length: 3,
153 minimum_index_ideographic_substring_length: 1,
154 },
155 output: OutputConfig {
156 UNUSED_filename: None,
157 debug: true,
158 save_nearest_html_id: false,
159 excerpt_buffer: 8,
160 excerpts_per_result: 5,
161 displayed_results_count: 10,
162 },
163 }
164 }
165
166 #[test]
169 fn simple_toml_config_is_parseable() {
170 let contents = r#"
171[input]
172base_directory = "test/federalist"
173files = [
174 {path = "federalist-1.txt", url = "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-1", title = "Introduction"},
175 {path = "federalist-2.txt", url = "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-2", title = "Concerning Dangers from Foreign Force and Influence"},
176 {path = "federalist-3.txt", url = "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-3", title = "Concerning Dangers from Foreign Force and Influence 2"},
177]
178
179[output]
180debug = true
181 "#;
182
183 let computed = Config::try_from(contents).unwrap();
184 let expected = get_default_config();
185
186 assert_eq!(computed, expected);
187 }
188
189 #[test]
190 fn simple_json_config_is_parseable() {
191 let contents = r#"
192 {
193 "input": {
194 "base_directory": "test/federalist",
195 "files": [
196 {
197 "path": "federalist-1.txt",
198 "url": "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-1",
199 "title": "Introduction"
200 },
201 {
202 "path": "federalist-2.txt",
203 "url": "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-2",
204 "title": "Concerning Dangers from Foreign Force and Influence"
205 },
206 {
207 "path": "federalist-3.txt",
208 "url": "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-3",
209 "title": "Concerning Dangers from Foreign Force and Influence 2"
210 }
211 ]
212 },
213 "output": {
214 "debug": true
215 }
216 }
217 "#;
218
219 let computed = Config::try_from(contents).unwrap();
220 let expected = get_default_config();
221
222 assert_eq!(computed, expected);
223 }
224
225 #[test]
226 fn bad_toml_syntax_fails_with_toml_error() {
227 let contents = r#"[input] {}"#;
228 let error = Config::try_from(contents).unwrap_err();
229 let computed = error.to_string();
230 let expected = "Cannot parse config as TOML. Stork recieved error: `expected newline, found a left brace at line 1 column 9`";
231 assert_eq!(computed, expected);
232 }
233 #[test]
234 fn bad_json_syntax_fails_with_json_error() {
235 let contents = r#"{"input", ]}"#;
236 let error = Config::try_from(contents).unwrap_err();
237 let computed = error.to_string();
238 let expected =
239 "Cannot parse config as JSON. Stork recieved error: `expected `:` at line 1 column 9`";
240 assert_eq!(computed, expected);
241 }
242
243 #[test]
244 fn empty_file_array_fails() {
245 let contents = r#"
246[input]
247files = [{}]
248 "#;
249 let result: toml::de::Error = toml::from_str::<Config>(contents).unwrap_err();
250 let computed = result.to_string();
251 let expected = "missing field `title` for key `input.files` at line 3 column 10"; assert_eq!(computed, expected);
253 }
254}