url_cleaner_engine/types/
cleaner.rs

1//! The configuration for how a URL should be cleaned.
2
3use std::fs::read_to_string;
4use std::path::Path;
5use std::borrow::Cow;
6use std::io;
7#[cfg(feature = "default-cleaner")]
8use std::sync::OnceLock;
9
10use serde::{Serialize, Deserialize};
11use thiserror::Error;
12
13use crate::types::*;
14use crate::glue::*;
15use crate::testing::*;
16use crate::util::*;
17
18pub mod params;
19pub use params::*;
20pub mod docs;
21pub use docs::*;
22pub mod common_call;
23pub use common_call::*;
24pub mod commons;
25pub use commons::*;
26
27/// The config that determines all behavior of how URLs are cleaned.
28#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize, Serialize, Suitability)]
29pub struct Cleaner {
30    /// The documentation.
31    ///
32    /// Defaults to an empty [`CleanerDocs`].
33    #[serde(default, skip_serializing_if = "is_default")]
34    pub docs: CleanerDocs,
35    /// The location of the cache.
36    ///
37    /// Defaults to being stored in memory and destroyed on program exit.
38    #[cfg(feature = "cache")]
39    #[serde(default, skip_serializing_if = "is_default")]
40    pub cache_path: CachePath,
41    /// Fine tuning shared between all [`Task`]s of a [`Job`] and maybe multiple [`Job`]s.
42    ///
43    /// Defaults to an empty [`Params`].
44    #[serde(default, skip_serializing_if = "is_default")]
45    pub params: Params,
46    /// Basically functions.
47    ///
48    /// Defaults to an empty [`Commons`].
49    #[serde(default, skip_serializing_if = "is_default")]
50    pub commons: Commons,
51    /// The [`Action`]s to apply.
52    ///
53    /// Defaults to an empty [`Vec`].
54    #[serde(default, skip_serializing_if = "is_default")]
55    pub actions: Vec<Action>
56}
57
58impl Cleaner {
59    /// Load [`Self`] from a JSON file.
60    /// # Errors
61    /// If the call to [`std::fs::read_to_string`] returns an error, that error is returned.
62    ///
63    /// If the call to [`serde_json::from_str`] returns an error, that error is returned.
64    pub fn load_from_file<T: AsRef<Path>>(path: T) -> Result<Self, GetCleanerError> {
65        serde_json::from_str(&read_to_string(path)?).map_err(Into::into)
66    }
67
68    /// Gets the default [`Self`] compiled into the binary itself.
69    ///
70    /// Caching is done by putting the [`Self`] in [`DEFAULT_CLEANER`] and only returning references to it.
71    ///
72    /// If you know you're only going to get the default config once, [`Self::get_default_no_cache`] is better because you can apply [`ParamsDiff`]s to it without [`Clone::clone`]ing.
73    /// # Errors
74    /// If the call to [`Self::get_default_no_cache`] returns an error, that error is returned.
75    /// # Examples
76    /// ```
77    /// use url_cleaner_engine::types::*;
78    ///
79    /// Cleaner::get_default().unwrap();
80    /// ```
81    #[allow(dead_code, reason = "Public API.")]
82    #[cfg(feature = "default-cleaner")]
83    pub fn get_default() -> Result<&'static Self, GetCleanerError> {
84        if let Some(config) = DEFAULT_CLEANER.get() {
85            Ok(config)
86        } else {
87            let config = Self::get_default_no_cache()?;
88            Ok(DEFAULT_CLEANER.get_or_init(|| config))
89        }
90    }
91
92    /// Deserializes [`DEFAULT_CLEANER_STR`] and returns it without caching in [`DEFAULT_CLEANER`]
93    ///
94    /// If you're getting the default config often and rarely using [`ParamsDiff`]s, [`Self::get_default`] may be better due to it only deserializing the config once.
95    /// # Errors
96    /// If the call to [`serde_json::from_str`] returns an error, that error is returned.
97    /// # Examples
98    /// ```
99    /// use url_cleaner_engine::types::*;
100    ///
101    /// Cleaner::get_default_no_cache().unwrap();
102    /// ```
103    #[cfg(feature = "default-cleaner")]
104    pub fn get_default_no_cache() -> Result<Self, GetCleanerError> {
105        serde_json::from_str(DEFAULT_CLEANER_STR).map_err(Into::into)
106    }
107
108    /// If `path` is [`Some`], returns the result of [`Self::load_from_file`] in a [`Cow::Owned`].
109    ///
110    /// If `path` is [`None`], returns the result of [`Self::get_default`] in a [`Cow::Borrowed`].
111    /// # Errors
112    /// If the call to [`Self::load_from_file`] returns an error, that error is returned.
113    ///
114    /// If the call to [`Self::get_default`] returns an error, that error is returned.
115    /// # Examples
116    /// ```
117    /// use url_cleaner_engine::types::*;
118    ///
119    /// assert_eq!(
120    ///     Cleaner::get_default().unwrap(),
121    ///     &*Cleaner::load_or_get_default(None::<&str>).unwrap()
122    /// );
123    ///
124    /// assert_eq!(
125    ///     Cleaner::get_default().unwrap(),
126    ///     &*Cleaner::load_or_get_default(Some("default-cleaner.json")).unwrap()
127    /// );
128    /// ```
129    #[allow(dead_code, reason = "Public API.")]
130    #[cfg(feature = "default-cleaner")]
131    pub fn load_or_get_default<T: AsRef<Path>>(path: Option<T>) -> Result<Cow<'static, Self>, GetCleanerError> {
132        Ok(match path {
133            Some(path) => Cow::Owned(Self::load_from_file(path)?),
134            None => Cow::Borrowed(Self::get_default()?)
135        })
136    }
137
138    /// If `path` is [`Some`], returns the result of [`Self::load_from_file`].
139    ///
140    /// If `path` is [`None`], returns the result of [`Self::get_default_no_cache`].
141    /// # Errors
142    /// If the call to [`Self::load_from_file`] returns an error, that error is returned.
143    ///
144    /// If the call to [`Self::get_default`] returns an error, that error is returned.
145    /// # Examples
146    /// ```
147    /// use url_cleaner_engine::types::*;
148    ///
149    /// assert_eq!(
150    ///     Cleaner::get_default_no_cache().unwrap(),
151    ///     Cleaner::load_or_get_default_no_cache(None::<&str>).unwrap()
152    /// );
153    ///
154    /// assert_eq!(
155    ///     Cleaner::get_default_no_cache().unwrap(),
156    ///     Cleaner::load_or_get_default_no_cache(Some("default-cleaner.json")).unwrap()
157    /// );
158    /// ```
159    #[cfg(feature = "default-cleaner")]
160    pub fn load_or_get_default_no_cache<T: AsRef<Path>>(path: Option<T>) -> Result<Self, GetCleanerError> {
161        Ok(match path {
162            Some(path) => Self::load_from_file(path)?,
163            None => Self::get_default_no_cache()?
164        })
165    }
166
167    /// Applies each [`Action`] in [`Self::actions`] in order to the provided [`TaskState`].
168    ///
169    /// If an error is returned, `job_state` may be left in a partially modified state.
170    /// # Errors
171    /// If any call to [`Action::apply`] returns an error, that error is returned.
172    pub fn apply(&self, job_state: &mut TaskState) -> Result<(), ApplyCleanerError> {
173        for action in &self.actions {
174            action.apply(job_state)?;
175        }
176        Ok(())
177    }
178
179    /// Runs the provided [`Tests`], panicking if any of them fail.
180    /// # Panics
181    /// If any [`Test`] fails, panics.
182    pub fn run_tests(&self, tests: Tests) {
183        tests.r#do(self);
184    }
185
186    /// Asserts the suitability of `self` to be URL Cleaner's default config.
187    ///
188    /// Exact behavior is unspecified and changes are not considered breaking.
189    /// # Panics
190    /// If `self` is deemed unsuitable to be URL Cleaner's default config, panics.
191    #[cfg_attr(feature = "default-cleaner", doc = "# Examples")]
192    #[cfg_attr(feature = "default-cleaner", doc = "```")]
193    #[cfg_attr(feature = "default-cleaner", doc = "use url_cleaner_engine::types::*;")]
194    #[cfg_attr(feature = "default-cleaner", doc = "")]
195    #[cfg_attr(feature = "default-cleaner", doc = "Cleaner::get_default().unwrap().assert_suitability();")]
196    #[cfg_attr(feature = "default-cleaner", doc = "```")]
197    pub fn assert_suitability(&self) {
198        Suitability::assert_suitability(self, self)
199    }
200}
201
202/// The enum of errors [`Cleaner::apply`] can return.
203#[derive(Debug, Error)]
204pub enum ApplyCleanerError {
205    /// Returned when a [`ActionError`] is encountered.
206    #[error(transparent)]
207    ActionError(#[from] ActionError)
208}
209
210/// The JSON text of the default config.
211#[cfg(all(feature = "default-cleaner", not(test)))]
212pub const DEFAULT_CLEANER_STR: &str = include_str!(concat!(env!("OUT_DIR"), "/default-cleaner.json.minified"));
213/// The JSON text of the default config.
214#[cfg(all(feature = "default-cleaner", test))]
215pub const DEFAULT_CLEANER_STR: &str = include_str!("../../default-cleaner.json");
216/// The cached deserialization of the default config.
217#[cfg(feature = "default-cleaner")]
218#[allow(dead_code, reason = "Public API.")]
219pub static DEFAULT_CLEANER: OnceLock<Cleaner> = OnceLock::new();
220
221/// The enum of errors that can happen when loading a [`Cleaner`].
222#[derive(Debug, Error)]
223pub enum GetCleanerError {
224    /// Returned when loading a [`Cleaner`] fails.
225    #[error(transparent)]
226    CantLoadCleaner(#[from] io::Error),
227    /// Returned when deserializing a [`Cleaner`] fails.
228    #[error(transparent)]
229    CantParseCleaner(#[from] serde_json::Error),
230}