url_cleaner_engine/types/
cleaner.rs

1//! The configuration for how a URL should be cleaned.
2
3use std::fs::read_to_string;
4use std::path::Path;
5use std::borrow::Cow;
6use std::io;
7#[cfg(feature = "default-cleaner")]
8use std::sync::OnceLock;
9
10use serde::{Serialize, Deserialize};
11use thiserror::Error;
12
13use crate::types::*;
14use crate::testing::*;
15use crate::util::*;
16
17pub mod params;
18pub use params::*;
19pub mod docs;
20pub use docs::*;
21pub mod common_call;
22pub use common_call::*;
23pub mod commons;
24pub use commons::*;
25
26/// The config that determines all behavior of how URLs are cleaned.
27#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize, Serialize, Suitability)]
28#[serde(deny_unknown_fields)]
29pub struct Cleaner<'a> {
30    /// The documentation.
31    ///
32    /// Defaults to an empty [`CleanerDocs`].
33    #[serde(default, skip_serializing_if = "is_default")]
34    pub docs: Cow<'a, CleanerDocs>,
35    /// Tuning shared between all [`Task`]s spawned with this [`Cleaner`].
36    ///
37    /// Defaults to an empty [`Params`].
38    #[serde(default, skip_serializing_if = "is_default")]
39    pub params: Params<'a>,
40    /// Basically functions.
41    ///
42    /// Defaults to an empty [`Commons`].
43    #[serde(default, skip_serializing_if = "is_default")]
44    pub commons: Cow<'a, Commons>,
45    /// The [`Action`]s to apply.
46    ///
47    /// Defaults to an empty [`Vec`].
48    #[serde(default, skip_serializing_if = "is_default")]
49    pub actions: Cow<'a, [Action]>
50}
51
52impl<'a> Cleaner<'a> {
53    /// Create a new [`Self`] that [`Cow::Borrowed`]s all fields.
54    ///
55    /// Basically a very cheap [`Clone`] that you can apply [`ParamsDiff`]s to.
56    pub fn borrowed(&'a self) -> Self {
57        Self {
58            docs   : Cow::Borrowed(&*self.docs),
59            params : self.params.borrowed(),
60            commons: Cow::Borrowed(&*self.commons),
61            actions: Cow::Borrowed(&*self.actions)
62        }
63    }
64
65    /// Load [`Self`] from a JSON file.
66    /// # Errors
67    #[doc = edoc!(callerr(std::fs::read_to_string), callerr(serde_json::from_str))]
68    pub fn load_from_file<T: AsRef<Path>>(path: T) -> Result<Cleaner<'static>, GetCleanerError> {
69        serde_json::from_str(&read_to_string(path)?).map_err(Into::into)
70    }
71
72    /// Gets the default [`Self`] compiled into the binary itself.
73    ///
74    /// If you know you're only going to get the default config once, [`Self::get_default_no_cache`] is better because you can apply [`ParamsDiff`]s to it without [`Clone::clone`]ing.
75    /// # Errors
76    #[doc = edoc!(callerr(Self::get_default_no_cache))]
77    /// If the call to [`Self::get_default_no_cache`] returns an error, that error is returned.
78    /// # Examples
79    /// ```
80    /// use url_cleaner_engine::types::*;
81    ///
82    /// Cleaner::get_default().unwrap();
83    /// ```
84    #[cfg(feature = "default-cleaner")]
85    pub fn get_default() -> Result<&'static Cleaner<'static>, GetCleanerError> {
86        if let Some(config) = DEFAULT_CLEANER.get() {
87            Ok(config)
88        } else {
89            let config = Self::get_default_no_cache()?;
90            Ok(DEFAULT_CLEANER.get_or_init(|| config))
91        }
92    }
93
94    /// Deserializes [`DEFAULT_CLEANER_STR`] and returns it without caching.
95    ///
96    /// If you're getting the default config often and rarely using [`ParamsDiff`]s, [`Self::get_default`] may be better due to it only deserializing the config once.
97    /// # Errors
98    #[doc = edoc!(callerr(serde_json::from_str))]
99    /// # Examples
100    /// ```
101    /// use url_cleaner_engine::types::*;
102    ///
103    /// Cleaner::get_default_no_cache().unwrap();
104    /// ```
105    #[cfg(feature = "default-cleaner")]
106    pub fn get_default_no_cache() -> Result<Cleaner<'static>, GetCleanerError> {
107        serde_json::from_str(DEFAULT_CLEANER_STR).map_err(Into::into)
108    }
109
110    /// If `path` is [`Some`], returns the result of [`Self::load_from_file`] in a [`Cow::Owned`].
111    ///
112    /// If `path` is [`None`], returns the result of [`Self::get_default`] in a [`Cow::Borrowed`].
113    /// # Errors
114    #[doc = edoc!(callerr(Self::load_from_file), callerr(Self::get_default))]
115    /// # Examples
116    /// ```
117    /// use url_cleaner_engine::types::*;
118    ///
119    /// assert_eq!(
120    ///     Cleaner::get_default().unwrap(),
121    ///     &*Cleaner::load_or_get_default(None::<&str>).unwrap()
122    /// );
123    ///
124    /// assert_eq!(
125    ///     Cleaner::get_default().unwrap(),
126    ///     &*Cleaner::load_or_get_default(Some("default-cleaner.json")).unwrap()
127    /// );
128    /// ```
129    #[cfg(feature = "default-cleaner")]
130    pub fn load_or_get_default<T: AsRef<Path>>(path: Option<T>) -> Result<Cow<'static, Self>, GetCleanerError> {
131        Ok(match path {
132            Some(path) => Cow::Owned(Self::load_from_file(path)?),
133            None => Cow::Borrowed(Self::get_default()?)
134        })
135    }
136
137    /// If `path` is [`Some`], returns the result of [`Self::load_from_file`].
138    ///
139    /// If `path` is [`None`], returns the result of [`Self::get_default_no_cache`].
140    /// # Errors
141    #[doc = edoc!(callerr(Self::load_from_file), callerr(Self::get_default_no_cache))]
142    /// # Examples
143    /// ```
144    /// use url_cleaner_engine::types::*;
145    ///
146    /// assert_eq!(
147    ///     Cleaner::get_default_no_cache().unwrap(),
148    ///     Cleaner::load_or_get_default_no_cache(None::<&str>).unwrap()
149    /// );
150    ///
151    /// assert_eq!(
152    ///     Cleaner::get_default_no_cache().unwrap(),
153    ///     Cleaner::load_or_get_default_no_cache(Some("default-cleaner.json")).unwrap()
154    /// );
155    /// ```
156    #[cfg(feature = "default-cleaner")]
157    pub fn load_or_get_default_no_cache<T: AsRef<Path>>(path: Option<T>) -> Result<Self, GetCleanerError> {
158        Ok(match path {
159            Some(path) => Self::load_from_file(path)?,
160            None => Self::get_default_no_cache()?
161        })
162    }
163
164    /// Applies each [`Action`] in [`Self::actions`] in order to the provided [`TaskState`].
165    ///
166    /// If an error is returned, `task_state` may be left in a partially modified state.
167    /// # Errors
168    #[doc = edoc!(applyerr(Action, 3))]
169    pub fn apply(&self, task_state: &mut TaskState) -> Result<(), ApplyCleanerError> {
170        for action in &*self.actions {
171            action.apply(task_state)?;
172        }
173        Ok(())
174    }
175
176    /// Runs the provided [`Tests`], panicking if any of them fail.
177    /// # Panics
178    /// If any [`Test`] fails, panics.
179    pub fn run_tests(&self, tests: Tests) {
180        tests.r#do(self);
181    }
182
183    /// Asserts the suitability of `self` to be URL Cleaner's default config.
184    ///
185    /// Exact behavior is unspecified and changes are not considered breaking.
186    /// # Panics
187    /// If `self` is deemed unsuitable to be URL Cleaner's default config, panics.
188    #[cfg_attr(feature = "default-cleaner", doc = "# Examples")]
189    #[cfg_attr(feature = "default-cleaner", doc = "```")]
190    #[cfg_attr(feature = "default-cleaner", doc = "use url_cleaner_engine::types::*;")]
191    #[cfg_attr(feature = "default-cleaner", doc = "")]
192    #[cfg_attr(feature = "default-cleaner", doc = "Cleaner::get_default().unwrap().assert_suitability();")]
193    #[cfg_attr(feature = "default-cleaner", doc = "```")]
194    pub fn assert_suitability(&self) {
195        Suitability::assert_suitability(self, self)
196    }
197}
198
199/// The enum of errors [`Cleaner::apply`] can return.
200#[derive(Debug, Error)]
201pub enum ApplyCleanerError {
202    /// Returned when a [`ActionError`] is encountered.
203    #[error(transparent)]
204    ActionError(#[from] ActionError)
205}
206
207/// The JSON text of the default config.
208#[cfg(all(feature = "default-cleaner", not(test)))]
209pub const DEFAULT_CLEANER_STR: &str = include_str!(concat!(env!("OUT_DIR"), "/default-cleaner.json.minified"));
210/// The JSON text of the default config.
211#[cfg(all(feature = "default-cleaner", test))]
212pub const DEFAULT_CLEANER_STR: &str = include_str!("../../default-cleaner.json");
213/// The cached deserialization of the default config.
214#[cfg(feature = "default-cleaner")]
215static DEFAULT_CLEANER: OnceLock<Cleaner> = OnceLock::new();
216
217/// The enum of errors that can happen when loading a [`Cleaner`].
218#[derive(Debug, Error)]
219pub enum GetCleanerError {
220    /// Returned when loading a [`Cleaner`] fails.
221    #[error(transparent)]
222    CantLoadCleaner(#[from] io::Error),
223    /// Returned when deserializing a [`Cleaner`] fails.
224    #[error(transparent)]
225    CantParseCleaner(#[from] serde_json::Error),
226}