url_cleaner_engine/types/cleaner.rs
1//! The configuration for how a URL should be cleaned.
2
3use std::fs::read_to_string;
4use std::path::Path;
5use std::borrow::Cow;
6use std::io;
7#[cfg(feature = "default-cleaner")]
8use std::sync::OnceLock;
9
10use serde::{Serialize, Deserialize};
11use thiserror::Error;
12
13use crate::types::*;
14use crate::glue::*;
15use crate::testing::*;
16use crate::util::*;
17
18pub mod params;
19pub use params::*;
20pub mod docs;
21pub use docs::*;
22pub mod common_call;
23pub use common_call::*;
24pub mod commons;
25pub use commons::*;
26
27/// The config that determines all behavior of how URLs are cleaned.
28#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize, Serialize, Suitability)]
29pub struct Cleaner {
30 /// The documentation.
31 ///
32 /// Defaults to an empty [`CleanerDocs`].
33 #[serde(default, skip_serializing_if = "is_default")]
34 pub docs: CleanerDocs,
35 /// The location of the cache.
36 ///
37 /// Defaults to being stored in memory and destroyed on program exit.
38 #[cfg(feature = "cache")]
39 #[serde(default, skip_serializing_if = "is_default")]
40 pub cache_path: CachePath,
41 /// Fine tuning shared between all [`Task`]s of a [`Job`] and maybe multiple [`Job`]s.
42 ///
43 /// Defaults to an empty [`Params`].
44 #[serde(default, skip_serializing_if = "is_default")]
45 pub params: Params,
46 /// Basically functions.
47 ///
48 /// Defaults to an empty [`Commons`].
49 #[serde(default, skip_serializing_if = "is_default")]
50 pub commons: Commons,
51 /// The [`Action`]s to apply.
52 ///
53 /// Defaults to an empty [`Vec`].
54 #[serde(default, skip_serializing_if = "is_default")]
55 pub actions: Vec<Action>
56}
57
58impl Cleaner {
59 /// Load [`Self`] from a JSON file.
60 /// # Errors
61 /// If the call to [`std::fs::read_to_string`] returns an error, that error is returned.
62 ///
63 /// If the call to [`serde_json::from_str`] returns an error, that error is returned.
64 pub fn load_from_file<T: AsRef<Path>>(path: T) -> Result<Self, GetCleanerError> {
65 serde_json::from_str(&read_to_string(path)?).map_err(Into::into)
66 }
67
68 /// Gets the default [`Self`] compiled into the binary itself.
69 ///
70 /// Caching is done by putting the [`Self`] in [`DEFAULT_CLEANER`] and only returning references to it.
71 ///
72 /// If you know you're only going to get the default config once, [`Self::get_default_no_cache`] is better because you can apply [`ParamsDiff`]s to it without [`Clone::clone`]ing.
73 /// # Errors
74 /// If the call to [`Self::get_default_no_cache`] returns an error, that error is returned.
75 /// # Examples
76 /// ```
77 /// use url_cleaner_engine::types::*;
78 ///
79 /// Cleaner::get_default().unwrap();
80 /// ```
81 #[allow(dead_code, reason = "Public API.")]
82 #[cfg(feature = "default-cleaner")]
83 pub fn get_default() -> Result<&'static Self, GetCleanerError> {
84 if let Some(config) = DEFAULT_CLEANER.get() {
85 Ok(config)
86 } else {
87 let config = Self::get_default_no_cache()?;
88 Ok(DEFAULT_CLEANER.get_or_init(|| config))
89 }
90 }
91
92 /// Deserializes [`DEFAULT_CLEANER_STR`] and returns it without caching in [`DEFAULT_CLEANER`]
93 ///
94 /// If you're getting the default config often and rarely using [`ParamsDiff`]s, [`Self::get_default`] may be better due to it only deserializing the config once.
95 /// # Errors
96 /// If the call to [`serde_json::from_str`] returns an error, that error is returned.
97 /// # Examples
98 /// ```
99 /// use url_cleaner_engine::types::*;
100 ///
101 /// Cleaner::get_default_no_cache().unwrap();
102 /// ```
103 #[cfg(feature = "default-cleaner")]
104 pub fn get_default_no_cache() -> Result<Self, GetCleanerError> {
105 serde_json::from_str(DEFAULT_CLEANER_STR).map_err(Into::into)
106 }
107
108 /// If `path` is [`Some`], returns the result of [`Self::load_from_file`] in a [`Cow::Owned`].
109 ///
110 /// If `path` is [`None`], returns the result of [`Self::get_default`] in a [`Cow::Borrowed`].
111 /// # Errors
112 /// If the call to [`Self::load_from_file`] returns an error, that error is returned.
113 ///
114 /// If the call to [`Self::get_default`] returns an error, that error is returned.
115 /// # Examples
116 /// ```
117 /// use url_cleaner_engine::types::*;
118 ///
119 /// assert_eq!(
120 /// Cleaner::get_default().unwrap(),
121 /// &*Cleaner::load_or_get_default(None::<&str>).unwrap()
122 /// );
123 ///
124 /// assert_eq!(
125 /// Cleaner::get_default().unwrap(),
126 /// &*Cleaner::load_or_get_default(Some("default-cleaner.json")).unwrap()
127 /// );
128 /// ```
129 #[allow(dead_code, reason = "Public API.")]
130 #[cfg(feature = "default-cleaner")]
131 pub fn load_or_get_default<T: AsRef<Path>>(path: Option<T>) -> Result<Cow<'static, Self>, GetCleanerError> {
132 Ok(match path {
133 Some(path) => Cow::Owned(Self::load_from_file(path)?),
134 None => Cow::Borrowed(Self::get_default()?)
135 })
136 }
137
138 /// If `path` is [`Some`], returns the result of [`Self::load_from_file`].
139 ///
140 /// If `path` is [`None`], returns the result of [`Self::get_default_no_cache`].
141 /// # Errors
142 /// If the call to [`Self::load_from_file`] returns an error, that error is returned.
143 ///
144 /// If the call to [`Self::get_default`] returns an error, that error is returned.
145 /// # Examples
146 /// ```
147 /// use url_cleaner_engine::types::*;
148 ///
149 /// assert_eq!(
150 /// Cleaner::get_default_no_cache().unwrap(),
151 /// Cleaner::load_or_get_default_no_cache(None::<&str>).unwrap()
152 /// );
153 ///
154 /// assert_eq!(
155 /// Cleaner::get_default_no_cache().unwrap(),
156 /// Cleaner::load_or_get_default_no_cache(Some("default-cleaner.json")).unwrap()
157 /// );
158 /// ```
159 #[cfg(feature = "default-cleaner")]
160 pub fn load_or_get_default_no_cache<T: AsRef<Path>>(path: Option<T>) -> Result<Self, GetCleanerError> {
161 Ok(match path {
162 Some(path) => Self::load_from_file(path)?,
163 None => Self::get_default_no_cache()?
164 })
165 }
166
167 /// Applies each [`Action`] in [`Self::actions`] in order to the provided [`TaskState`].
168 ///
169 /// If an error is returned, `job_state` may be left in a partially modified state.
170 /// # Errors
171 /// If any call to [`Action::apply`] returns an error, that error is returned.
172 pub fn apply(&self, job_state: &mut TaskState) -> Result<(), ApplyCleanerError> {
173 for action in &self.actions {
174 action.apply(job_state)?;
175 }
176 Ok(())
177 }
178
179 /// Runs the provided [`Tests`], panicking if any of them fail.
180 /// # Panics
181 /// If any [`Test`] fails, panics.
182 pub fn run_tests(&self, tests: Tests) {
183 tests.r#do(self);
184 }
185
186 /// Asserts the suitability of `self` to be URL Cleaner's default config.
187 ///
188 /// Exact behavior is unspecified and changes are not considered breaking.
189 /// # Panics
190 /// If `self` is deemed unsuitable to be URL Cleaner's default config, panics.
191 #[cfg_attr(feature = "default-cleaner", doc = "# Examples")]
192 #[cfg_attr(feature = "default-cleaner", doc = "```")]
193 #[cfg_attr(feature = "default-cleaner", doc = "use url_cleaner_engine::types::*;")]
194 #[cfg_attr(feature = "default-cleaner", doc = "")]
195 #[cfg_attr(feature = "default-cleaner", doc = "Cleaner::get_default().unwrap().assert_suitability();")]
196 #[cfg_attr(feature = "default-cleaner", doc = "```")]
197 pub fn assert_suitability(&self) {
198 Suitability::assert_suitability(self, self)
199 }
200}
201
202/// The enum of errors [`Cleaner::apply`] can return.
203#[derive(Debug, Error)]
204pub enum ApplyCleanerError {
205 /// Returned when a [`ActionError`] is encountered.
206 #[error(transparent)]
207 ActionError(#[from] ActionError)
208}
209
210/// The JSON text of the default config.
211#[cfg(all(feature = "default-cleaner", not(test)))]
212pub const DEFAULT_CLEANER_STR: &str = include_str!(concat!(env!("OUT_DIR"), "/default-cleaner.json.minified"));
213/// The JSON text of the default config.
214#[cfg(all(feature = "default-cleaner", test))]
215pub const DEFAULT_CLEANER_STR: &str = include_str!("../../default-cleaner.json");
216/// The cached deserialization of the default config.
217#[cfg(feature = "default-cleaner")]
218#[allow(dead_code, reason = "Public API.")]
219pub static DEFAULT_CLEANER: OnceLock<Cleaner> = OnceLock::new();
220
221/// The enum of errors that can happen when loading a [`Cleaner`].
222#[derive(Debug, Error)]
223pub enum GetCleanerError {
224 /// Returned when loading a [`Cleaner`] fails.
225 #[error(transparent)]
226 CantLoadCleaner(#[from] io::Error),
227 /// Returned when deserializing a [`Cleaner`] fails.
228 #[error(transparent)]
229 CantParseCleaner(#[from] serde_json::Error),
230}