url_cleaner_engine/types/cleaner.rs
1//! The configuration for how a URL should be cleaned.
2
3use std::fs::read_to_string;
4use std::path::Path;
5use std::borrow::Cow;
6use std::io;
7#[cfg(feature = "default-cleaner")]
8use std::sync::OnceLock;
9
10use serde::{Serialize, Deserialize};
11use thiserror::Error;
12
13use crate::types::*;
14use crate::testing::*;
15use crate::util::*;
16
17pub mod params;
18pub use params::*;
19pub mod docs;
20pub use docs::*;
21pub mod common_call;
22pub use common_call::*;
23pub mod commons;
24pub use commons::*;
25
26/// The config that determines all behavior of how URLs are cleaned.
27#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize, Serialize, Suitability)]
28#[serde(deny_unknown_fields)]
29pub struct Cleaner<'a> {
30 /// The documentation.
31 ///
32 /// Defaults to an empty [`CleanerDocs`].
33 #[serde(default, skip_serializing_if = "is_default")]
34 pub docs: Cow<'a, CleanerDocs>,
35 /// Tuning shared between all [`Task`]s spawned with this [`Cleaner`].
36 ///
37 /// Defaults to an empty [`Params`].
38 #[serde(default, skip_serializing_if = "is_default")]
39 pub params: Params<'a>,
40 /// Basically functions.
41 ///
42 /// Defaults to an empty [`Commons`].
43 #[serde(default, skip_serializing_if = "is_default")]
44 pub commons: Cow<'a, Commons>,
45 /// The [`Action`]s to apply.
46 ///
47 /// Defaults to an empty [`Vec`].
48 #[serde(default, skip_serializing_if = "is_default")]
49 pub actions: Cow<'a, [Action]>
50}
51
52impl<'a> Cleaner<'a> {
53 /// Create a new [`Self`] that [`Cow::Borrowed`]s all fields.
54 ///
55 /// Basically a very cheap [`Clone`] that you can apply [`ParamsDiff`]s to.
56 pub fn borrowed(&'a self) -> Self {
57 Self {
58 docs : Cow::Borrowed(&*self.docs),
59 params : self.params.borrowed(),
60 commons: Cow::Borrowed(&*self.commons),
61 actions: Cow::Borrowed(&*self.actions)
62 }
63 }
64
65 /// Load [`Self`] from a JSON file.
66 /// # Errors
67 #[doc = edoc!(callerr(std::fs::read_to_string), callerr(serde_json::from_str))]
68 pub fn load_from_file<T: AsRef<Path>>(path: T) -> Result<Cleaner<'static>, GetCleanerError> {
69 serde_json::from_str(&read_to_string(path)?).map_err(Into::into)
70 }
71
72 /// Gets the default [`Self`] compiled into the binary itself.
73 ///
74 /// If you know you're only going to get the default config once, [`Self::get_default_no_cache`] is better because you can apply [`ParamsDiff`]s to it without [`Clone::clone`]ing.
75 /// # Errors
76 #[doc = edoc!(callerr(Self::get_default_no_cache))]
77 /// If the call to [`Self::get_default_no_cache`] returns an error, that error is returned.
78 /// # Examples
79 /// ```
80 /// use url_cleaner_engine::types::*;
81 ///
82 /// Cleaner::get_default().unwrap();
83 /// ```
84 #[cfg(feature = "default-cleaner")]
85 pub fn get_default() -> Result<&'static Cleaner<'static>, GetCleanerError> {
86 if let Some(config) = DEFAULT_CLEANER.get() {
87 Ok(config)
88 } else {
89 let config = Self::get_default_no_cache()?;
90 Ok(DEFAULT_CLEANER.get_or_init(|| config))
91 }
92 }
93
94 /// Deserializes [`DEFAULT_CLEANER_STR`] and returns it without caching.
95 ///
96 /// If you're getting the default config often and rarely using [`ParamsDiff`]s, [`Self::get_default`] may be better due to it only deserializing the config once.
97 /// # Errors
98 #[doc = edoc!(callerr(serde_json::from_str))]
99 /// # Examples
100 /// ```
101 /// use url_cleaner_engine::types::*;
102 ///
103 /// Cleaner::get_default_no_cache().unwrap();
104 /// ```
105 #[cfg(feature = "default-cleaner")]
106 pub fn get_default_no_cache() -> Result<Cleaner<'static>, GetCleanerError> {
107 serde_json::from_str(DEFAULT_CLEANER_STR).map_err(Into::into)
108 }
109
110 /// If `path` is [`Some`], returns the result of [`Self::load_from_file`] in a [`Cow::Owned`].
111 ///
112 /// If `path` is [`None`], returns the result of [`Self::get_default`] in a [`Cow::Borrowed`].
113 /// # Errors
114 #[doc = edoc!(callerr(Self::load_from_file), callerr(Self::get_default))]
115 /// # Examples
116 /// ```
117 /// use url_cleaner_engine::types::*;
118 ///
119 /// assert_eq!(
120 /// Cleaner::get_default().unwrap(),
121 /// &*Cleaner::load_or_get_default(None::<&str>).unwrap()
122 /// );
123 ///
124 /// assert_eq!(
125 /// Cleaner::get_default().unwrap(),
126 /// &*Cleaner::load_or_get_default(Some("default-cleaner.json")).unwrap()
127 /// );
128 /// ```
129 #[cfg(feature = "default-cleaner")]
130 pub fn load_or_get_default<T: AsRef<Path>>(path: Option<T>) -> Result<Cow<'static, Self>, GetCleanerError> {
131 Ok(match path {
132 Some(path) => Cow::Owned(Self::load_from_file(path)?),
133 None => Cow::Borrowed(Self::get_default()?)
134 })
135 }
136
137 /// If `path` is [`Some`], returns the result of [`Self::load_from_file`].
138 ///
139 /// If `path` is [`None`], returns the result of [`Self::get_default_no_cache`].
140 /// # Errors
141 #[doc = edoc!(callerr(Self::load_from_file), callerr(Self::get_default_no_cache))]
142 /// # Examples
143 /// ```
144 /// use url_cleaner_engine::types::*;
145 ///
146 /// assert_eq!(
147 /// Cleaner::get_default_no_cache().unwrap(),
148 /// Cleaner::load_or_get_default_no_cache(None::<&str>).unwrap()
149 /// );
150 ///
151 /// assert_eq!(
152 /// Cleaner::get_default_no_cache().unwrap(),
153 /// Cleaner::load_or_get_default_no_cache(Some("default-cleaner.json")).unwrap()
154 /// );
155 /// ```
156 #[cfg(feature = "default-cleaner")]
157 pub fn load_or_get_default_no_cache<T: AsRef<Path>>(path: Option<T>) -> Result<Self, GetCleanerError> {
158 Ok(match path {
159 Some(path) => Self::load_from_file(path)?,
160 None => Self::get_default_no_cache()?
161 })
162 }
163
164 /// Applies each [`Action`] in [`Self::actions`] in order to the provided [`TaskState`].
165 ///
166 /// If an error is returned, `task_state` may be left in a partially modified state.
167 /// # Errors
168 #[doc = edoc!(applyerr(Action, 3))]
169 pub fn apply(&self, task_state: &mut TaskState) -> Result<(), ApplyCleanerError> {
170 for action in &*self.actions {
171 action.apply(task_state)?;
172 }
173 Ok(())
174 }
175
176 /// Runs the provided [`Tests`], panicking if any of them fail.
177 /// # Panics
178 /// If any [`Test`] fails, panics.
179 pub fn run_tests(&self, tests: Tests) {
180 tests.r#do(self);
181 }
182
183 /// Asserts the suitability of `self` to be URL Cleaner's default config.
184 ///
185 /// Exact behavior is unspecified and changes are not considered breaking.
186 /// # Panics
187 /// If `self` is deemed unsuitable to be URL Cleaner's default config, panics.
188 #[cfg_attr(feature = "default-cleaner", doc = "# Examples")]
189 #[cfg_attr(feature = "default-cleaner", doc = "```")]
190 #[cfg_attr(feature = "default-cleaner", doc = "use url_cleaner_engine::types::*;")]
191 #[cfg_attr(feature = "default-cleaner", doc = "")]
192 #[cfg_attr(feature = "default-cleaner", doc = "Cleaner::get_default().unwrap().assert_suitability();")]
193 #[cfg_attr(feature = "default-cleaner", doc = "```")]
194 pub fn assert_suitability(&self) {
195 Suitability::assert_suitability(self, self)
196 }
197}
198
199/// The enum of errors [`Cleaner::apply`] can return.
200#[derive(Debug, Error)]
201pub enum ApplyCleanerError {
202 /// Returned when a [`ActionError`] is encountered.
203 #[error(transparent)]
204 ActionError(#[from] ActionError)
205}
206
207/// The JSON text of the default config.
208#[cfg(all(feature = "default-cleaner", not(test)))]
209pub const DEFAULT_CLEANER_STR: &str = include_str!(concat!(env!("OUT_DIR"), "/default-cleaner.json.minified"));
210/// The JSON text of the default config.
211#[cfg(all(feature = "default-cleaner", test))]
212pub const DEFAULT_CLEANER_STR: &str = include_str!("../../default-cleaner.json");
213/// The cached deserialization of the default config.
214#[cfg(feature = "default-cleaner")]
215static DEFAULT_CLEANER: OnceLock<Cleaner> = OnceLock::new();
216
217/// The enum of errors that can happen when loading a [`Cleaner`].
218#[derive(Debug, Error)]
219pub enum GetCleanerError {
220 /// Returned when loading a [`Cleaner`] fails.
221 #[error(transparent)]
222 CantLoadCleaner(#[from] io::Error),
223 /// Returned when deserializing a [`Cleaner`] fails.
224 #[error(transparent)]
225 CantParseCleaner(#[from] serde_json::Error),
226}