stork_lib/config/
mod.rs

1#![allow(clippy::module_name_repetitions)]
2
3use serde::{Deserialize, Serialize};
4use smart_default::SmartDefault;
5
6mod input;
7pub use input::{InputConfig, TitleBoost};
8
9mod output;
10pub use output::OutputConfig;
11
12mod stemming;
13pub use stemming::StemmingConfig;
14
15mod frontmatter;
16pub use self::frontmatter::FrontmatterConfig;
17
18mod file;
19pub use file::{DataSource, File, Filetype};
20
21mod srt;
22pub use srt::{SRTConfig, SRTTimestampFormat};
23
24mod errors;
25pub use errors::ConfigReadError;
26
27#[derive(Serialize, Deserialize, Debug, SmartDefault, PartialEq)]
28#[serde(deny_unknown_fields, default)]
29pub struct Config {
30    pub input: InputConfig,
31    pub output: OutputConfig,
32}
33
34impl TryFrom<&str> for Config {
35    type Error = ConfigReadError;
36
37    fn try_from(value: &str) -> Result<Self, Self::Error> {
38        if value.is_empty() {
39            return Err(ConfigReadError::EmptyString);
40        }
41
42        let toml_output = toml::from_str::<Self>(value);
43        let json_output = serde_json::from_str::<Self>(value);
44
45        match (toml_output, json_output) {
46            (Ok(toml_config), _) => Ok(toml_config),
47
48            (Err(_), Ok(json_config)) => Ok(json_config),
49
50            (Err(toml_error), Err(json_error)) => {
51                if let Some((mut toml_line, mut toml_col)) = toml_error.line_col() {
52                    toml_line += 1;
53                    toml_col += 1;
54                    if toml_line > json_error.line()
55                        || (toml_line == json_error.line() && toml_col > json_error.column())
56                    {
57                        Err(ConfigReadError::UnparseableTomlInput(toml_error))
58                    } else {
59                        Err(ConfigReadError::UnparseableJsonInput(json_error))
60                    }
61                } else {
62                    Err(ConfigReadError::UnparseableJsonInput(json_error))
63                }
64            }
65        }
66    }
67}
68
69#[cfg(test)]
70mod tests {
71    use std::collections::HashMap;
72
73    use super::*;
74    use pretty_assertions::assert_eq;
75
76    #[test]
77    fn empty_string_via_tryfrom_returns_error() {
78        let contents = r#""#;
79        let error = Config::try_from(contents).unwrap_err();
80        assert_eq!(error, ConfigReadError::EmptyString);
81    }
82
83    fn get_default_config() -> Config {
84        Config {
85            input: InputConfig {
86                UNUSED_surrounding_word_count: None,
87                base_directory: "test/federalist".into(),
88                url_prefix: String::new(),
89                title_boost: TitleBoost::Moderate,
90                stemming: StemmingConfig::Language(
91                    rust_stemmers::Algorithm::English,
92                ),
93                html_selector: None,
94                exclude_html_selector: None,
95                frontmatter_handling: FrontmatterConfig::Omit,
96                files: vec![
97                    File {
98                        title: "Introduction".into(),
99                        url: "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-1".into(),
100                        explicit_source: Some(
101                            DataSource::FilePath(
102                                "federalist-1.txt".into(),
103                            ),
104                        ),
105                        id: None,
106                        stemming_override: None,
107                        html_selector_override: None,
108                        exclude_html_selector_override: None,
109                        frontmatter_handling_override: None,
110                        filetype: None,
111                        fields: HashMap::new(),
112                    },
113                    File {
114                        title: "Concerning Dangers from Foreign Force and Influence".into(),
115                        url: "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-2".into(),
116                        explicit_source: Some(
117                            DataSource::FilePath(
118                                "federalist-2.txt".into(),
119                            ),
120                        ),
121                        id: None,
122                        stemming_override: None,
123                        html_selector_override: None,
124                        exclude_html_selector_override: None,
125                        frontmatter_handling_override: None,
126                        filetype: None,
127                        fields: HashMap::new(),
128                    },
129                    File {
130                        title: "Concerning Dangers from Foreign Force and Influence 2".into(),
131                        url: "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-3".into(),
132                        explicit_source: Some(
133                            DataSource::FilePath(
134                                "federalist-3.txt".into(),
135                            ),
136                        ),
137                        id: None,
138                        stemming_override: None,
139                        html_selector_override: None,
140                        exclude_html_selector_override: None,
141                        frontmatter_handling_override: None,
142                        filetype: None,
143                        fields: HashMap::new(),
144                    },
145                ],
146                break_on_file_error: false,
147                srt_config: SRTConfig {
148                    timestamp_linking: true,
149                    timestamp_template_string: "&t={ts}".into(),
150                    timestamp_format: SRTTimestampFormat::NumberOfSeconds,
151                },
152                minimum_indexed_substring_length: 3,
153                minimum_index_ideographic_substring_length: 1,
154            },
155            output: OutputConfig {
156                UNUSED_filename: None,
157                debug: true,
158                save_nearest_html_id: false,
159                excerpt_buffer: 8,
160                excerpts_per_result: 5,
161                displayed_results_count: 10,
162            },
163        }
164    }
165
166    // This test also makes sure that our default values don't change
167    // without being accounted for in tests.
168    #[test]
169    fn simple_toml_config_is_parseable() {
170        let contents = r#"
171[input]
172base_directory = "test/federalist"
173files = [
174    {path = "federalist-1.txt", url = "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-1", title = "Introduction"},
175    {path = "federalist-2.txt", url = "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-2", title = "Concerning Dangers from Foreign Force and Influence"},
176    {path = "federalist-3.txt", url = "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-3", title = "Concerning Dangers from Foreign Force and Influence 2"},
177]
178
179[output]
180debug = true
181    "#;
182
183        let computed = Config::try_from(contents).unwrap();
184        let expected = get_default_config();
185
186        assert_eq!(computed, expected);
187    }
188
189    #[test]
190    fn simple_json_config_is_parseable() {
191        let contents = r#"
192        {
193            "input": {
194                "base_directory": "test/federalist",
195                "files": [
196                    {
197                        "path": "federalist-1.txt",
198                        "url": "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-1",
199                        "title": "Introduction"
200                    },
201                    {
202                        "path": "federalist-2.txt",
203                        "url": "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-2",
204                        "title": "Concerning Dangers from Foreign Force and Influence"
205                    },
206                    {
207                        "path": "federalist-3.txt",
208                        "url": "https://www.congress.gov/resources/display/content/The+Federalist+Papers#TheFederalistPapers-3",
209                        "title": "Concerning Dangers from Foreign Force and Influence 2"
210                    }
211                ]
212            },
213            "output": {
214                "debug": true
215            }
216        }
217    "#;
218
219        let computed = Config::try_from(contents).unwrap();
220        let expected = get_default_config();
221
222        assert_eq!(computed, expected);
223    }
224
225    #[test]
226    fn bad_toml_syntax_fails_with_toml_error() {
227        let contents = r#"[input] {}"#;
228        let error = Config::try_from(contents).unwrap_err();
229        let computed = error.to_string();
230        let expected = "Cannot parse config as TOML. Stork recieved error: `expected newline, found a left brace at line 1 column 9`";
231        assert_eq!(computed, expected);
232    }
233    #[test]
234    fn bad_json_syntax_fails_with_json_error() {
235        let contents = r#"{"input", ]}"#;
236        let error = Config::try_from(contents).unwrap_err();
237        let computed = error.to_string();
238        let expected =
239            "Cannot parse config as JSON. Stork recieved error: `expected `:` at line 1 column 9`";
240        assert_eq!(computed, expected);
241    }
242
243    #[test]
244    fn empty_file_array_fails() {
245        let contents = r#"
246[input]
247files = [{}]
248    "#;
249        let result: toml::de::Error = toml::from_str::<Config>(contents).unwrap_err();
250        let computed = result.to_string();
251        let expected = "missing field `title` for key `input.files` at line 3 column 10"; // TODO: Can this be nicer?
252        assert_eq!(computed, expected);
253    }
254}