url_cleaner_engine/types/actions.rs
1//! Logic for how a [`TaskState`] should be modified.
2
3use std::str::Utf8Error;
4use std::collections::HashSet;
5use std::borrow::Cow;
6
7use serde::{Serialize, Deserialize};
8use serde_with::{serde_as, SetPreventDuplicates};
9use thiserror::Error;
10#[cfg(feature = "http")]
11use reqwest::header::HeaderMap;
12#[expect(unused_imports, reason = "Used in doc comment.")]
13use url::Url;
14
15use crate::glue::*;
16use crate::types::*;
17use crate::util::*;
18
19/// Actions are how [`TaskState`]s get manipulated to clean URLs.
20///
21/// Please note that, in general, when a [`Action`] returns an [`Err`], the [`TaskState`] may still be modified.
22///
23/// For example, a [`Action::All`] containing 3 [`Action`]s and the second one returns an error, the effects of the first [`Action`] is still applied.
24///
25/// In practice this should rarely be an issue, but when it is, use [`Action::RevertOnError`].
26#[serde_as]
27#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Suitability)]
28pub enum Action {
29 /// Does nothing.
30 /// # Examples
31 /// ```
32 /// use url_cleaner_engine::types::*;
33 /// url_cleaner_engine::task_state!(task_state, url = "https://example.com");
34 ///
35 /// Action::None.apply(&mut task_state).unwrap();
36 ///
37 /// assert_eq!(task_state.url, "https://example.com/");
38 /// ```
39 None,
40 /// Always returns the error [`ActionError::ExplicitError`] with the included message.
41 /// # Errors
42 /// Always returns the error [`ActionError::ExplicitError`].
43 /// # Examples
44 /// ```
45 /// use url_cleaner_engine::types::*;
46 /// url_cleaner_engine::task_state!(task_state, url = "https://example.com");
47 ///
48 /// Action::Error("...".into()).apply(&mut task_state).unwrap_err();
49 ///
50 /// assert_eq!(task_state.url, "https://example.com/");
51 /// ```
52 Error(String),
53 /// Prints debug info about the contained [`Self`] and the current [`TaskStateView`], then returns its return value.
54 /// # Errors
55 /// If the call to [`Self::apply`] returns an error, that error is returned after the debug info is printed.
56 #[suitable(never)]
57 Debug(Box<Self>),
58
59 /// If the call to [`Self::If::if`] passes, apply [`Self::If::then`].
60 ///
61 /// If the call to [`Self::If::if`] fails and [`Self::If::else`] is [`Some`], apply [`Self::If::else`].
62 /// # Errors
63 /// If the call to [`Condition::satisfied_by`] returns an error, that error is returned.
64 ///
65 /// If the call to [`Self::apply`] returns an error, that error is returned.
66 /// # Examples
67 /// ```
68 /// use url_cleaner_engine::types::*;
69 /// url_cleaner_engine::task_state!(task_state, url = "https://example.com");
70 ///
71 /// Action::If {
72 /// r#if : Condition::Always,
73 /// then : Box::new(Action::None),
74 /// r#else: Some(Box::new(Action::Error("...".into())))
75 /// }.apply(&mut task_state).unwrap();
76 ///
77 /// Action::If {
78 /// r#if : Condition::Never,
79 /// then : Box::new(Action::None),
80 /// r#else: Some(Box::new(Action::Error("...".into())))
81 /// }.apply(&mut task_state).unwrap_err();
82 ///
83 /// Action::If {
84 /// r#if : Condition::Always,
85 /// then : Box::new(Action::None),
86 /// r#else: None
87 /// }.apply(&mut task_state).unwrap();
88 ///
89 /// Action::If {
90 /// r#if : Condition::Never,
91 /// then : Box::new(Action::None),
92 /// r#else: None
93 /// }.apply(&mut task_state).unwrap();
94 /// ```
95 If {
96 /// The [`Condition`] to decide between [`Self::If::then`] and [`Self::If::else`].
97 r#if: Condition,
98 /// The [`Self`] to apply if [`Self::If::if`] passes.
99 then: Box<Self>,
100 /// The [`Self`] to apply if [`Self::If::if`] fails.
101 ///
102 /// Defaults to [`None`].
103 #[serde(default, skip_serializing_if = "is_default")]
104 r#else: Option<Box<Self>>
105 },
106 /// Applies the contained [`Self`]s in order.
107 ///
108 /// Please note that if one of the contained [`Self`]s returns an error, previous calls to [`Self::apply`] aren't reverted.
109 /// # Errors
110 /// If any call to [`Self::apply`] returns an error, that error is returned.
111 /// # Examples
112 /// ```
113 /// use url_cleaner_engine::types::*;
114 /// url_cleaner_engine::task_state!(task_state);
115 ///
116 /// Action::All(vec![
117 /// Action::SetHost(Some("example2.com".to_string())),
118 /// Action::Error("...".into()),
119 /// Action::SetHost(Some("example3.com".to_string())),
120 /// ]).apply(&mut task_state).unwrap_err();
121 ///
122 /// assert_eq!(task_state.url, "https://example2.com/");
123 /// ```
124 All(Vec<Self>),
125 /// Gets the value specified by [`Self::PartMap::part`], indexes [`Self::PartMap::map`], and applies the returned [`Self`]
126 ///
127 /// If the call to [`Map::get`] returns [`None`], does nothing..
128 /// # Errors
129 /// If the call to [`Self::apply`] returns an error, that error is returned.
130 /// # Examples
131 /// ```
132 /// use url_cleaner_engine::types::*;
133 /// url_cleaner_engine::task_state!(task_state);
134 ///
135 /// Action::PartMap {
136 /// part: UrlPart::Host,
137 /// map: Map {
138 /// map: [
139 /// ("example.com".into(), Action::Error("...".into()))
140 /// ].into(),
141 /// if_null: None,
142 /// r#else: None
143 /// }
144 /// }.apply(&mut task_state).unwrap_err();
145 /// ```
146 PartMap {
147 /// The [`UrlPart`] to index [`Self::PartMap::map`] with.
148 part: UrlPart,
149 /// The [`Map`] to index with [`Self::PartMap::part`].
150 #[serde(flatten)]
151 map: Map<Self>
152 },
153 /// Gets the string specified by [`Self::StringMap::value`], indexes [`Self::StringMap::map`], and applies the returned [`Self`].
154 ///
155 /// If the call to [`Map::get`] returns [`None`], does nothing.
156 /// # Errors
157 /// If the call to [`StringSource::get`] returns an error, that error is returned.
158 ///
159 /// If the call to [`Self::apply`] returns an error, that error is returned.
160 /// # Examples
161 /// ```
162 /// use url_cleaner_engine::types::*;
163 /// url_cleaner_engine::task_state!(task_state);
164 ///
165 /// Action::StringMap {
166 /// value: StringSource::String("a".into()),
167 /// map: Map {
168 /// map: [
169 /// ("a".into(), Action::Error("...".into()))
170 /// ].into(),
171 /// if_null: None,
172 /// r#else: None
173 /// }
174 /// }.apply(&mut task_state).unwrap_err();
175 /// ```
176 StringMap {
177 /// The [`StringSource`] to index [`Self::StringMap::map`] with.
178 value: StringSource,
179 /// The [`Map`] to index with [`Self::StringMap::value`].
180 #[serde(flatten)]
181 map: Map<Self>
182 },
183
184 /// Repeat [`Self::Repeat::actions`] until either no changes happen or the rules were executed [`Self::Repeat::limit`] times.
185 /// # Errors
186 /// If any call to [`Action::apply`] returns an error, that error is returned.
187 Repeat {
188 /// The [`Self`]s to repeat.
189 actions: Vec<Action>,
190 /// The maximum amount of times to repeat.
191 ///
192 /// Defaults to 10.
193 #[serde(default = "get_10_u64")]
194 limit: u64
195 },
196
197 /// If the contained [`Self`] returns an error, ignore it.
198 ///
199 /// Does not revert any successful calls to [`Self::apply`]. For that, also use [`Self::RevertOnError`].
200 /// # Examples
201 /// ```
202 /// use url_cleaner_engine::types::*;
203 /// url_cleaner_engine::task_state!(task_state);
204 ///
205 /// Action::IgnoreError(Box::new(Action::Error("...".into()))).apply(&mut task_state).unwrap();
206 /// ```
207 IgnoreError(Box<Self>),
208 /// If the contained [`Self`] returns an error, revert the [`TaskState`] to its previous state.
209 /// # Errors
210 /// If the call to [`Self::apply`] returns an error, that error is returned.
211 RevertOnError(Box<Self>),
212 /// If [`Self::TryElse::try`]'s call to [`Self::apply`] returns an error, apply [`Self::TryElse::else`].
213 /// # Errors
214 /// If both calls to [`Self::apply`] return errors, both errors are returned.
215 TryElse {
216 /// The [`Self`] to try first.
217 r#try: Box<Self>,
218 /// The [`Self`] to try if [`Self::TryElse::try`] returns an error.
219 r#else: Box<Self>
220 },
221 /// Applies the contained [`Self`]s in order, stopping as soon as a call to [`Self::apply`] doesn't return an error.
222 /// # Errors
223 /// If all calls to [`Self::apply`] return errors, the last error is returned. In the future this should be changed to return all errors.
224 FirstNotError(Vec<Self>),
225
226 /// Remove the entire [`UrlPart::Query`].
227 /// # Examples
228 /// ```
229 /// use url_cleaner_engine::types::*;
230 /// url_cleaner_engine::task_state!(task_state, url = "https://example.com?a=2");
231 ///
232 /// Action::RemoveQuery.apply(&mut task_state).unwrap();
233 /// assert_eq!(task_state.url, "https://example.com/");
234 /// ```
235 RemoveQuery,
236 /// Removes all query parameters with the specified name.
237 ///
238 /// For performance reasons, if the resulting query is empty, this instead sets it to [`None`].
239 /// # Errors
240 /// If the call to [`StringSource::get`] returns an error, that error is returned.
241 /// # Examples
242 /// ```
243 /// use url_cleaner_engine::types::*;
244 /// url_cleaner_engine::task_state!(task_state, url = "https://example.com?a=2&b=3&a=4&c=5");
245 ///
246 /// Action::RemoveQueryParam("a".into()).apply(&mut task_state).unwrap();
247 /// assert_eq!(task_state.url.query(), Some("b=3&c=5"));
248 /// Action::RemoveQueryParam("b".into()).apply(&mut task_state).unwrap();
249 /// assert_eq!(task_state.url.query(), Some("c=5"));
250 /// Action::RemoveQueryParam("c".into()).apply(&mut task_state).unwrap();
251 /// assert_eq!(task_state.url.query(), None);
252 /// ```
253 RemoveQueryParam(StringSource),
254 /// Removes all query params with names in the specified [`HashSet`].
255 ///
256 /// For performance reasons, if the resulting query is empty, this instead sets it to [`None`].
257 /// # Examples
258 /// ```
259 /// use url_cleaner_engine::types::*;
260 /// url_cleaner_engine::task_state!(task_state, url = "https://example.com?a=2&b=3&a=4&c=5");
261 ///
262 /// Action::RemoveQueryParams(["a".to_string(), "b".to_string()].into()).apply(&mut task_state).unwrap();
263 /// assert_eq!(task_state.url.query(), Some("c=5"));
264 /// Action::RemoveQueryParams(["c".to_string()].into()).apply(&mut task_state).unwrap();
265 /// assert_eq!(task_state.url.query(), None);
266 /// ```
267 RemoveQueryParams(#[serde_as(as = "SetPreventDuplicates<_>")] HashSet<String>),
268 /// Keeps only query params with names in the specified [`HashSet`].
269 ///
270 /// For performance reasons, if the resulting query is empty, this instead sets it to [`None`].
271 /// # Examples
272 /// ```
273 /// use url_cleaner_engine::types::*;
274 /// url_cleaner_engine::task_state!(task_state, url = "https://example.com?a=2&b=3&a=4&c=5");
275 ///
276 /// Action::AllowQueryParams(["a".to_string(), "b".to_string()].into()).apply(&mut task_state).unwrap();
277 /// assert_eq!(task_state.url.query(), Some("a=2&b=3&a=4"));
278 /// Action::AllowQueryParams(["c".to_string()].into()).apply(&mut task_state).unwrap();
279 /// assert_eq!(task_state.url.query(), None);
280 /// ```
281 AllowQueryParams(#[serde_as(as = "SetPreventDuplicates<_>")] HashSet<String>),
282 /// Removes all query params with names matching the specified [`StringMatcher`].
283 ///
284 /// For performance reasons, if the resulting query is empty, this instead sets it to [`None`].
285 /// # Errors
286 /// If the call to [`StringMatcher::satisfied_by`] returns an error, that error is returned.
287 /// # Examples
288 /// ```
289 /// use url_cleaner_engine::types::*;
290 /// url_cleaner_engine::task_state!(task_state, url = "https://example.com?a=2&b=3&a=4&c=5");
291 ///
292 /// Action::RemoveQueryParamsMatching(StringMatcher::Is("a".into())).apply(&mut task_state).unwrap();
293 /// assert_eq!(task_state.url.query(), Some("b=3&c=5"));
294 /// Action::RemoveQueryParamsMatching(StringMatcher::Is("b".into())).apply(&mut task_state).unwrap();
295 /// assert_eq!(task_state.url.query(), Some("c=5"));
296 /// Action::RemoveQueryParamsMatching(StringMatcher::Is("c".into())).apply(&mut task_state).unwrap();
297 /// assert_eq!(task_state.url.query(), None);
298 /// ```
299 RemoveQueryParamsMatching(StringMatcher),
300 /// Keeps only query params with names matching the specified [`StringMatcher`].
301 ///
302 /// For performance reasons, if the resulting query is empty, this instead sets it to [`None`].
303 /// # Errors
304 /// If the call to [`StringMatcher::satisfied_by`] returns an error, that error is returned.
305 /// # Examples
306 /// ```
307 /// use url_cleaner_engine::types::*;
308 /// url_cleaner_engine::task_state!(task_state, url = "https://example.com?a=2&b=3&a=4&c=5");
309 ///
310 /// Action::AllowQueryParamsMatching(StringMatcher::Is("a".into())).apply(&mut task_state).unwrap();
311 /// assert_eq!(task_state.url.query(), Some("a=2&a=4"));
312 /// Action::AllowQueryParamsMatching(StringMatcher::Is("b".into())).apply(&mut task_state).unwrap();
313 /// assert_eq!(task_state.url.query(), None);
314 /// ```
315 AllowQueryParamsMatching(StringMatcher),
316 /// Sets [`UrlPart::Whole`] to the value of the first query parameter with a name determined by the [`TaskState`].
317 /// # Errors
318 /// If the call to [`StringSource::get`] returns an error, that error is returned.
319 ///
320 /// If the call to [`StringSource::get`] returns [`None`], returns the error [`ActionError::StringSourceIsNone`].
321 ///
322 /// If no matching query parameter is found, returns the error [`ActionError::QueryParamNotFound`].
323 /// # Examples
324 /// ```
325 /// use url_cleaner_engine::types::*;
326 /// url_cleaner_engine::task_state!(task_state, url = "https://example.com?redirect=https://example.com/2");
327 ///
328 /// Action::GetUrlFromQueryParam("redirect".into()).apply(&mut task_state).unwrap();
329 /// assert_eq!(task_state.url, "https://example.com/2");
330 ///
331 /// Action::GetUrlFromQueryParam("redirect".into()).apply(&mut task_state).unwrap_err();
332 /// ```
333 GetUrlFromQueryParam(StringSource),
334
335
336
337 /// Sets the [`UrlPart::Host`] to the specified value.
338 /// # Errors
339 /// If the call to [`BetterUrl::set_host`] returns an error, that error is returned.
340 /// # Examples
341 /// ```
342 /// use url_cleaner_engine::types::*;
343 /// url_cleaner_engine::task_state!(task_state, url = "https://example.com");
344 ///
345 /// Action::SetHost(Some("example2.com".into())).apply(&mut task_state).unwrap();
346 /// assert_eq!(task_state.url, "https://example2.com/")
347 /// ```
348 SetHost(Option<String>),
349 /// "Join"s a URL like how relative links on websites work.
350 ///
351 /// See [`Url::join`] for details.
352 /// # Errors
353 /// If the call to [`StringSource::get`] returns an error, that error is returned.
354 ///
355 /// If the call to [`Url::join`] returns an error, that error is returned.
356 /// # Examples
357 /// ```
358 /// use url_cleaner_engine::types::*;
359 /// url_cleaner_engine::task_state!(task_state, url = "https://example.com/a/b/c");
360 ///
361 /// Action::Join("..".into()).apply(&mut task_state).unwrap();
362 /// assert_eq!(task_state.url, "https://example.com/a/");
363 ///
364 ///
365 /// url_cleaner_engine::task_state!(task_state, url = "https://example.com/a/b/c/");
366 ///
367 /// Action::Join("..".into()).apply(&mut task_state).unwrap();
368 /// assert_eq!(task_state.url, "https://example.com/a/b/");
369 /// ```
370 Join(StringSource),
371
372
373
374 /// Sets the specified [`UrlPart`] to the specified value.
375 /// # Errors
376 /// If the call to [`StringSource::get`] returns an error, that error is returned.
377 ///
378 /// If the call to [`UrlPart::set`] returns an error, that error is returned.
379 /// # Examples
380 /// ```
381 /// use url_cleaner_engine::types::*;
382 /// url_cleaner_engine::task_state!(task_state, url = "https://example.com");
383 ///
384 /// Action::SetPart {part: UrlPart::Path, value: "abc".into()}.apply(&mut task_state).unwrap();
385 /// assert_eq!(task_state.url, "https://example.com/abc");
386 /// ```
387 SetPart {
388 /// The part to set the value of.
389 part: UrlPart,
390 /// The value to set the part to.
391 value: StringSource
392 },
393 /// If the specified [`UrlPart`] is [`Some`], applies [`Self::ModifyPart::modification`].
394 ///
395 /// If the part is [`None`], does nothing.
396 /// # Errors
397 /// If the call to [`StringModification::apply`] returns an error, that error is returned.
398 ///
399 /// If the call to [`UrlPart::set`] returns an error, that error is returned.
400 /// # Examples
401 /// ```
402 /// use url_cleaner_engine::types::*;
403 ///
404 /// url_cleaner_engine::task_state!(task_state, url = "https://example.com");
405 ///
406 /// Action::ModifyPart {part: UrlPart::Path, modification: StringModification::Set("abc".into())}.apply(&mut task_state).unwrap();
407 /// assert_eq!(task_state.url, "https://example.com/abc");
408 ///
409 /// Action::ModifyPart {part: UrlPart::Query, modification: StringModification::Set("abc".into())}.apply(&mut task_state).unwrap();
410 /// assert_eq!(task_state.url, "https://example.com/abc?abc");
411 /// ```
412 ModifyPart {
413 /// The part to modify.
414 part: UrlPart,
415 /// The modification to apply to the part.
416 modification: StringModification
417 },
418 /// If the specified [`UrlPart`] is [`Some`], apply [`Self::ModifyPartIfSome::modification`].
419 /// # Errors
420 /// If the call to [`StringModification::apply`] returns an error, that error is returned.
421 ///
422 /// If the call to [`UrlPart::set`] returns an error, that error is returned.
423 ModifyPartIfSome {
424 /// The [`UrlPart`] to modify.
425 part: UrlPart,
426 /// The [`StringModification`] to apply.
427 modification: StringModification
428 },
429 /// Sets [`Self::CopyPart::to`] to the value of [`Self::CopyPart::from`], leaving [`Self::CopyPart::from`] unchanged.
430 /// # Errors
431 /// If the call to [`UrlPart::set`] returns an error, that error is returned.
432 /// # Examples
433 /// ```
434 /// use url_cleaner_engine::types::*;
435 ///
436 /// url_cleaner_engine::task_state!(task_state, url = "https://example.com/abc#def");
437 ///
438 /// Action::CopyPart {from: UrlPart::Fragment, to: UrlPart::Path}.apply(&mut task_state).unwrap();
439 /// assert_eq!(task_state.url, "https://example.com/def#def");
440 /// ```
441 CopyPart {
442 /// The part whose value to copy.
443 from: UrlPart,
444 /// The part whose value to set.
445 to: UrlPart
446 },
447 /// Sets [`Self::CopyPart::to`] to the value of [`Self::CopyPart::from`], then sets [`Self::CopyPart::from`] to [`None`].
448 /// # Errors
449 /// If either call to [`UrlPart::set`] returns an error, that error is returned.
450 /// # Examples
451 /// ```
452 /// use url_cleaner_engine::types::*;
453 ///
454 /// url_cleaner_engine::task_state!(task_state, url = "https://example.com/abc#def");
455 ///
456 /// Action::MovePart {from: UrlPart::Fragment, to: UrlPart::Path}.apply(&mut task_state).unwrap();
457 /// assert_eq!(task_state.url, "https://example.com/def");
458 /// ```
459 MovePart {
460 /// The part whose value to move.
461 from: UrlPart,
462 /// The part whose value to set.
463 to: UrlPart
464 },
465
466 /// Sends an HTTP GET request to the current [`TaskState::url`], and sets it either to the value of the response's `Location` header (if the response is a redirect) or the final URL after redirects.
467 ///
468 /// If the `cache` feature flag is enabled, caches the operation with the category `redirect`, the key set to the input URL, and the value set to the returned URL.
469 /// # Errors
470 #[cfg_attr(feature = "cache", doc = "If the call to [`Cache::read`] returns an error, that error is returned.")]
471 #[cfg_attr(feature = "cache", doc = "")]
472 #[cfg_attr(feature = "cache", doc = "If the call to [`Cache::read`] returns [`None`], returns the error [`ActionError::CachedUrlIsNone`].")]
473 #[cfg_attr(feature = "cache", doc = "")]
474 #[cfg_attr(feature = "cache", doc = "If the call to [`BetterUrl::parse`] returns an error, that error is returned.")]
475 #[cfg_attr(feature = "cache", doc = "")]
476 /// If the call to [`TaskStateView::http_client`] returns an error, that error is returned.
477 ///
478 /// If the call to [`reqwest::blocking::RequestBuilder::send`] returns an error, that error is returned.
479 ///
480 /// If the response is a redirect and doesn't contain a `Location` header, returns the error [`ActionError::LocationHeaderNotFound`].
481 ///
482 /// If the `Location` header's call to [`std::str::from_utf8`] returns an error, that error is returned.
483 ///
484 /// If the `Location` header's call to [`BetterUrl::parse`] returns an error, that error is returned.
485 #[cfg_attr(feature = "cache", doc = "")]
486 #[cfg_attr(feature = "cache", doc = "If the call to [`Cache::write`] returns an error, that error is returned.")]
487 #[cfg(feature = "http")]
488 ExpandRedirect {
489 /// The extra headers to send.
490 ///
491 /// Defaults to an empty [`HeaderMap`].
492 #[serde(default, skip_serializing_if = "is_default", with = "serde_headermap")]
493 headers: HeaderMap,
494 /// The [`HttpClientConfigDiff`] to apply.
495 ///
496 /// Defaults to [`None`].
497 ///
498 /// Boxed because it's massive.
499 #[serde(default, skip_serializing_if = "is_default")]
500 http_client_config_diff: Option<Box<HttpClientConfigDiff>>
501 },
502 /// Sets the specified [`Scratchpad::flags`] to [`Self::SetScratchpadFlag::value`].
503 /// # Errors
504 /// If the call to [`StringSource::get`] returns an error, that error is returned.
505 ///
506 /// If the call to [`StringSource::get`] returns [`None`], returns the error [`ActionError::StringSourceIsNone`].
507 /// # Examples
508 /// ```
509 /// use url_cleaner_engine::types::*;
510 ///
511 /// url_cleaner_engine::task_state!(task_state);
512 ///
513 /// assert_eq!(task_state.scratchpad.flags.contains("abc"), false);
514 /// Action::SetScratchpadFlag {name: "abc".into(), value: true}.apply(&mut task_state).unwrap();
515 /// assert_eq!(task_state.scratchpad.flags.contains("abc"), true);
516 /// ```
517 SetScratchpadFlag {
518 /// The name of the flag to set.
519 name: StringSource,
520 /// The value to set the flag to.
521 value: bool
522 },
523 /// Sets the specified [`Scratchpad::vars`] to [`Self::SetScratchpadVar::value`].
524 /// # Errors
525 /// If either call to [`StringSource::get`] returns an error, that error is returned.
526 /// # Examples
527 /// ```
528 /// use url_cleaner_engine::types::*;
529 ///
530 /// url_cleaner_engine::task_state!(task_state);
531 ///
532 /// Action::SetScratchpadVar {name: "abc".into(), value: "def".into()}.apply(&mut task_state).unwrap();
533 /// assert_eq!(task_state.scratchpad.vars.get("abc").map(|x| &**x), Some("def"));
534 /// Action::SetScratchpadVar {name: "abc".into(), value: StringSource::None}.apply(&mut task_state).unwrap();
535 /// assert_eq!(task_state.scratchpad.vars.get("abc").map(|x| &**x), None);
536 /// ```
537 SetScratchpadVar {
538 /// The name of the var to set.
539 name: StringSource,
540 /// The value to set the var to.
541 value: StringSource
542 },
543 /// If the specified [`Scratchpad::vars`] is [`Some`], applies [`Self::ModifyScratchpadVar::modification`].
544 ///
545 /// If the part is [`None`], does nothing.
546 /// # Errors
547 /// If the call to [`StringSource::get`] returns an error, that error is returned.
548 ///
549 /// If the call to [`StringSource::get`] returns [`None`], returns the error [`ActionError::StringSourceIsNone`].
550 ///
551 /// If the call to [`StringModification::apply`] returns an error, that error is returned.
552 /// # Examples
553 /// ```
554 /// use url_cleaner_engine::types::*;
555 ///
556 /// url_cleaner_engine::task_state!(task_state);
557 ///
558 /// Action::ModifyScratchpadVar {name: "abc".into(), modification: StringModification::Set("123".into())}.apply(&mut task_state).unwrap();
559 /// assert_eq!(task_state.scratchpad.vars.get("abc").map(|x| &**x), Some("123"));
560 /// Action::ModifyScratchpadVar {name: "abc".into(), modification: StringModification::Set(StringSource::None)}.apply(&mut task_state).unwrap();
561 /// assert_eq!(task_state.scratchpad.vars.get("abc").map(|x| &**x), None);
562 /// ```
563 ModifyScratchpadVar {
564 /// The name of the var to modify.
565 name: StringSource,
566 /// The modification to apply.
567 modification: StringModification
568 },
569 /// If an entry with the specified category and a key of the current [`TaskState::url`] exists in the cache, sets the [`TaskState::url`] to the entry's value.
570 ///
571 /// If no matching entry exists, applies [`Self::CacheUrl::action`] and makes a new entry with the specified category, the previous [`TaskState::url`] as the key, and the new [`TaskState::url`] as the value.
572 ///
573 /// If an error is returned, no new cache entry is written.
574 /// # Errors
575 /// If the call to [`Cache::read`] returns an error, that error is returned.
576 ///
577 /// If the call to [`Cache::read`] returns [`None`], returns the error [`ActionError::CachedUrlIsNone`].
578 ///
579 /// If the call to [`BetterUrl::parse`] returns an error, that error is returned.
580 ///
581 /// If the call to [`Action::apply`] returns an error, that error is returned.
582 ///
583 /// If the call to [`Cache::write`] returns an error, that error is returned.
584 #[cfg(feature = "cache")]
585 CacheUrl {
586 /// The category for the cache entry.
587 category: StringSource,
588 /// The action to apply and cache.
589 action: Box<Self>
590 },
591 /// Applies a [`Self`] from [`TaskState::commons`]'s [`Commons::actions`].
592 /// # Errors
593 /// If the call to [`StringSource::get`] returns an error, that error is returned.
594 ///
595 /// If the call to [`StringSource::get`] returns [`None`], returns the error [`ActionError::StringSourceIsNone`].
596 ///
597 /// If the [`Commons::actions`] doesn't contain a [`Self`] with the specified name, returns the error [`ActionError::CommonActionNotFound`].
598 ///
599 /// If the call to [`CommonCallArgsSource::build`] returns an error, that error is returned.
600 ///
601 /// If the call to [`Self::apply`] returns an error, that error is returned.
602 /// # Examples
603 /// ```
604 /// use url_cleaner_engine::types::*;
605 ///
606 /// url_cleaner_engine::task_state!(task_state, commons = Commons {
607 /// actions: [("abc".into(), Action::None)].into(),
608 /// ..Default::default()
609 /// });
610 ///
611 /// Action::Common(CommonCall {name: Box::new("abc".into()), args: Default::default()}).apply(&mut task_state).unwrap();
612 /// ```
613 Common(CommonCall),
614 /// Calls the specified function and returns its value.
615 /// # Errors
616 /// If the call to the contained function returns an error, that error is returned.
617 /// # Examples
618 /// ```
619 /// use url_cleaner_engine::types::*;
620 ///
621 /// url_cleaner_engine::task_state!(task_state);
622 ///
623 /// fn some_complex_operation(task_state: &mut TaskState) -> Result<(), ActionError> {
624 /// Ok(())
625 /// }
626 ///
627 /// Action::Custom(some_complex_operation).apply(&mut task_state).unwrap();
628 /// ```
629 #[expect(clippy::type_complexity, reason = "Who cares")]
630 #[cfg(feature = "custom")]
631 #[suitable(never)]
632 #[serde(skip)]
633 Custom(fn(&mut TaskState) -> Result<(), ActionError>)
634}
635
636/// Helper function to get the default [`Rule::Repeat::limit`].
637const fn get_10_u64() -> u64 {10}
638
639/// The enum of errors [`Action::apply`] can return.
640#[derive(Debug, Error)]
641pub enum ActionError {
642 /// Returned when a [`Action::Error`] is used.
643 #[error("Explicit error: {0}")]
644 ExplicitError(String),
645 /// Returned when both [`Action`]s in a [`Action::TryElse`] return errors.
646 #[error("Both Actions in a Action::TryElse returned errors.")]
647 TryElseError {
648 /// The error returned by [`Action::TryElse::try`].
649 try_error: Box<Self>,
650 /// The error returned by [`Action::TryElse::else`].
651 else_error: Box<Self>
652 },
653 /// Returned when all [`Action`]s in a [`Action::FirstNotError`] error.
654 #[error("All Actions in a Action::FirstNotError errored.")]
655 FirstNotErrorErrors(Vec<Self>),
656
657 /// Returned when a part of the URL is [`None`] where it has to be [`Some`].
658 #[error("A StringSource returned None where it had to return Some.")]
659 StringSourceIsNone,
660
661 /// Returned when a [`SetHostError`] is encountered.
662 #[error(transparent)]
663 SetHostError(#[from] SetHostError),
664 /// Returned when attempting to get the value of a query param from a URL with no query.
665 #[error("Attempted to get the value of a query param from a URL with no query.")]
666 NoQuery,
667 /// Returned when attempting to get the value of a query param that wasn't found.
668 #[error("Attempted to get the value of a query param that wasn't found.")]
669 QueryParamNotFound,
670 /// Returned when attempting to get the value of a query param that didn't have a value.
671 #[error("Attempted to get the value of a query param that didn't have a value.")]
672 QueryParamNoValue,
673 /// Returned when a [`Action`] with the specified name isn't found in the [`Commons::actions`].
674 #[error("A Action with the specified name wasn't found in the Commons::actions.")]
675 CommonActionNotFound,
676 /// Returned when a [`url::ParseError`] is encountered.
677 #[error(transparent)]
678 UrlParseError(#[from] url::ParseError),
679 /// Returned when a [`Utf8Error`] is encountered.
680 #[error(transparent)]
681 Utf8Error(#[from] Utf8Error),
682 /// Returned when a [`UrlPartSetError`] is encountered.
683 #[error(transparent)]
684 UrlPartSetError(#[from] UrlPartSetError),
685 /// Returned when a [`StringMatcherError`] is encountered.
686 #[error(transparent)]
687 StringMatcherError(#[from] StringMatcherError),
688 /// Returned when a [`StringSourceError`] is encountered.
689 #[error(transparent)]
690 StringSourceError(#[from] StringSourceError),
691 /// Returned when a [`StringModificationError`] is encountered.
692 #[error(transparent)]
693 StringModificationError(#[from] StringModificationError),
694 /// Returned when a [`ConditionError`] is encountered.
695 #[error(transparent)]
696 ConditionError(#[from] ConditionError),
697
698 /// Returned when a [`reqwest::Error`] is encountered.
699 #[cfg(feature = "http")]
700 #[error(transparent)]
701 ReqwestError(#[from] reqwest::Error),
702 /// Returned when a redirect's `Location` header isn't found.
703 #[cfg(feature = "http")]
704 #[error("The redirect's Location header wasn't found")]
705 LocationHeaderNotFound,
706
707 /// Returned when a [`reqwest::header::ToStrError`] is encountered.
708 #[cfg(feature = "http")]
709 #[error(transparent)]
710 ToStrError(#[from] reqwest::header::ToStrError),
711
712 /// Returned when attempting to get a URL from the cache but its value is [`None`].
713 #[cfg(feature = "cache")]
714 #[error("Attempted to get a URL from the cache but its value was None.")]
715 CachedUrlIsNone,
716 /// Returned when a [`ReadFromCacheError`] is encountered.
717 #[cfg(feature = "cache")]
718 #[error(transparent)]
719 ReadFromCacheError(#[from] ReadFromCacheError),
720 /// Returned when a [`WriteToCacheError`] is encountered.
721 #[cfg(feature = "cache")]
722 #[error(transparent)]
723 WriteToCacheError(#[from] WriteToCacheError),
724
725 /// Returned when a [`CommonCallArgsError`] is encountered.
726 #[error(transparent)]
727 CommonCallArgsError(#[from] CommonCallArgsError),
728 /// An arbitrary [`std::error::Error`] returned by [`Action::Custom`].
729 #[error(transparent)]
730 #[cfg(feature = "custom")]
731 Custom(Box<dyn std::error::Error + Send>)
732}
733
734impl Action {
735 /// Applies the specified variant of [`Self`].
736 ///
737 /// If an error is returned, `task_state` may be left in a partially modified state.
738 /// # Errors
739 /// See each variant of [`Self`] for when each variant returns an error.
740 #[allow(clippy::missing_panics_doc, reason = "Can't happen.")]
741 pub fn apply(&self, task_state: &mut TaskState) -> Result<(), ActionError> {
742 debug!(self, Action::apply, self, task_state);
743 match self {
744 // Testing.
745
746 Self::None => {},
747 Self::Error(msg) => Err(ActionError::ExplicitError(msg.clone()))?,
748 Self::Debug(action) => {
749 let old_url = task_state.url.clone();
750 let old_scratchpad = task_state.scratchpad.clone();
751 let action_result=action.apply(task_state);
752 eprintln!("=== Action::Debug ===\nAction: {action:?}\nOld URL: {old_url:?}\nOld scratchpad: {old_scratchpad:?}\nAction return value: {action_result:?}\nNew task_state: {task_state:?}");
753 action_result?;
754 },
755
756 // Logic.
757
758 Self::If {r#if, then, r#else} => if r#if.satisfied_by(&task_state.to_view())? {
759 then.apply(task_state)?;
760 } else if let Some(r#else) = r#else {
761 r#else.apply(task_state)?;
762 },
763 Self::All(actions) => {
764 for action in actions {
765 action.apply(task_state)?;
766 }
767 },
768 Self::PartMap {part , map} => if let Some(action) = map.get(part .get( task_state.url ) ) {action.apply(task_state)?},
769 Self::StringMap{value, map} => if let Some(action) = map.get(value.get(&task_state.to_view())?) {action.apply(task_state)?},
770 Self::Repeat{actions, limit} => {
771 let mut previous_url;
772 let mut previous_scratchpad;
773 for _ in 0..*limit {
774 previous_url = task_state.url.to_string();
775 previous_scratchpad = task_state.scratchpad.clone();
776 for action in actions {
777 action.apply(task_state)?;
778 }
779 if task_state.url == &previous_url && task_state.scratchpad == &previous_scratchpad {break;}
780 }
781 },
782 // Error handling.
783
784 Self::IgnoreError(action) => {let _ = action.apply(task_state);},
785 Self::TryElse{ r#try, r#else } => match r#try.apply(task_state) {
786 Ok(x) => x,
787 Err(try_error) => match r#else.apply(task_state) {
788 Ok(x) => x,
789 Err(else_error) => Err(ActionError::TryElseError {try_error: Box::new(try_error), else_error: Box::new(else_error)})?
790 }
791 },
792 Self::FirstNotError(actions) => {
793 let mut errors = Vec::new();
794 for action in actions {
795 match action.apply(task_state) {
796 Ok(()) => return Ok(()),
797 Err(e) => errors.push(e)
798 }
799 }
800 Err(ActionError::FirstNotErrorErrors(errors))?
801 },
802 Self::RevertOnError(action) => {
803 let old_url = task_state.url.clone();
804 let old_scratchpad = task_state.scratchpad.clone();
805 if let Err(e) = action.apply(task_state) {
806 *task_state.url = old_url;
807 *task_state.scratchpad = old_scratchpad;
808 Err(e)?;
809 }
810 },
811
812 // Query.
813
814 Self::RemoveQuery => task_state.url.set_query(None),
815 Self::RemoveQueryParam(name) => if let Some(query) = task_state.url.query() {
816 let mut new = String::new();
817 let name = get_string!(name, task_state, ActionError);
818 for param in query.split('&') {
819 if peh(param.split('=').next().expect("The first segment to always exist.")) != name {
820 if !new.is_empty() {new.push('&');}
821 new.push_str(param);
822 }
823 }
824 task_state.url.set_query(Some(&*new).filter(|x| !x.is_empty()));
825 },
826 Self::RemoveQueryParams(names) => if let Some(query) = task_state.url.query() {
827 let mut new = String::new();
828 for param in query.split('&') {
829 if !names.contains(&*peh(param.split('=').next().expect("The first segment to always exist."))) {
830 if !new.is_empty() {new.push('&');}
831 new.push_str(param);
832 }
833 }
834 task_state.url.set_query(Some(&*new).filter(|x| !x.is_empty()));
835 },
836 Self::AllowQueryParams(names) => if let Some(query) = task_state.url.query() {
837 let mut new = String::new();
838 for param in query.split('&') {
839 if names.contains(&*peh(param.split('=').next().expect("The first segment to always exist."))) {
840 if !new.is_empty() {new.push('&');}
841 new.push_str(param);
842 }
843 }
844 task_state.url.set_query(Some(&*new).filter(|x| !x.is_empty()));
845 },
846 Self::RemoveQueryParamsMatching(matcher) => if let Some(query) = task_state.url.query() {
847 let mut new = String::new();
848 for param in query.split('&') {
849 if !matcher.satisfied_by(Some(&*peh(param.split('=').next().expect("The first segment to always exist."))), &task_state.to_view())? {
850 if !new.is_empty() {new.push('&');}
851 new.push_str(param);
852 }
853 }
854 task_state.url.set_query(Some(&*new).filter(|x| !x.is_empty()));
855 },
856 Self::AllowQueryParamsMatching(matcher) => if let Some(query) = task_state.url.query() {
857 let mut new = String::new();
858 for param in query.split('&') {
859 if matcher.satisfied_by(Some(&*peh(param.split('=').next().expect("The first segment to always exist."))), &task_state.to_view())? {
860 if !new.is_empty() {new.push('&');}
861 new.push_str(param);
862 }
863 }
864 task_state.url.set_query(Some(&*new).filter(|x| !x.is_empty()));
865 },
866 Self::GetUrlFromQueryParam(name) => {
867 let task_state_view = task_state.to_view();
868 let name = name.get(&task_state_view)?.ok_or(ActionError::StringSourceIsNone)?;
869
870 match task_state.url.get_query_param(&name, 0) {
871 Some(Some(Some(new_url))) => {*task_state.url = BetterUrl::parse(&new_url)?;},
872 Some(Some(None)) => Err(ActionError::QueryParamNoValue)?,
873 Some(None) => Err(ActionError::QueryParamNotFound)?,
874 None => Err(ActionError::NoQuery)?
875 }
876 },
877
878 // Other parts.
879
880 Self::SetHost(new_host) => task_state.url.set_host(new_host.as_deref())?,
881 Self::Join(with) => *task_state.url=task_state.url.join(get_str!(with, task_state, ActionError))?.into(),
882
883 // Generic part handling.
884
885 Self::SetPart {part, value} => part.set(task_state.url, value.get(&task_state.to_view())?.map(Cow::into_owned).as_deref())?, // The deref is needed for borrow checking reasons.
886 Self::ModifyPart {part, modification} => {
887 let mut temp = part.get(task_state.url);
888 modification.apply(&mut temp, &task_state.to_view())?;
889 part.set(task_state.url, temp.map(Cow::into_owned).as_deref())?;
890 },
891 Self::ModifyPartIfSome {part, modification} => {
892 if let mut temp @ Some(_) = part.get(task_state.url) {
893 modification.apply(&mut temp, &task_state.to_view())?;
894 part.set(task_state.url, temp.map(Cow::into_owned).as_deref())?;
895 }
896 }
897 Self::CopyPart {from, to} => to.set(task_state.url, from.get(task_state.url).map(|x| x.into_owned()).as_deref())?,
898 Self::MovePart {from, to} => {
899 to.set(task_state.url, from.get(task_state.url).map(|x| x.into_owned()).as_deref())?;
900 from.set(task_state.url, None)?;
901 },
902
903 // Miscellaneous.
904
905 #[cfg(feature = "http")]
906 Self::ExpandRedirect {headers, http_client_config_diff} => {
907 #[cfg(feature = "cache")]
908 if task_state.params.read_cache {
909 if let Some(new_url) = task_state.cache.read("redirect", task_state.url.as_str())? {
910 *task_state.url = BetterUrl::parse(&new_url.ok_or(ActionError::CachedUrlIsNone)?)?;
911 return Ok(());
912 }
913 }
914 let response = task_state.to_view().http_client(http_client_config_diff.as_deref())?.get(task_state.url.as_str()).headers(headers.clone()).send()?;
915 let new_url = if response.status().is_redirection() {
916 BetterUrl::parse(std::str::from_utf8(response.headers().get("location").ok_or(ActionError::LocationHeaderNotFound)?.as_bytes())?)?
917 } else {
918 response.url().clone().into()
919 };
920 #[cfg(feature = "cache")]
921 if task_state.params.write_cache {
922 task_state.cache.write("redirect", task_state.url.as_str(), Some(new_url.as_str()))?;
923 }
924 *task_state.url=new_url;
925 },
926
927 Self::SetScratchpadFlag {name, value} => {
928 let name = get_string!(name, task_state, ActionError);
929 match value {
930 true => task_state.scratchpad.flags.insert( name),
931 false => task_state.scratchpad.flags.remove(&name)
932 };
933 },
934 Self::SetScratchpadVar {name, value} => match value.get(&task_state.to_view())?.map(Cow::into_owned) {
935 Some(value) => {let _ = task_state.scratchpad.vars.insert( get_string!(name, task_state, ActionError), value);}
936 None => {let _ = task_state.scratchpad.vars.remove(&get_string!(name, task_state, ActionError));}
937 },
938 Self::ModifyScratchpadVar {name, modification} => {
939 let name = get_string!(name, task_state, ActionError).to_owned();
940 let mut value = task_state.scratchpad.vars.get(&name).map(|x| Cow::Borrowed(&**x));
941 modification.apply(&mut value, &task_state.to_view())?;
942 match value {
943 Some(value) => {let _ = task_state.scratchpad.vars.insert(name, value.into_owned());},
944 None => {let _ = task_state.scratchpad.vars.remove(&name);}
945 }
946 },
947 #[cfg(feature = "cache")]
948 Self::CacheUrl {category, action} => {
949 let category = get_string!(category, task_state, ActionError);
950 if task_state.params.read_cache {
951 if let Some(new_url) = task_state.cache.read(&category, task_state.url.as_str())? {
952 *task_state.url = BetterUrl::parse(&new_url.ok_or(ActionError::CachedUrlIsNone)?)?;
953 return Ok(());
954 }
955 }
956 let old_url = task_state.url.to_string();
957 action.apply(task_state)?;
958 if task_state.params.write_cache {
959 task_state.cache.write(&category, &old_url, Some(task_state.url.as_str()))?;
960 }
961 },
962 Self::Common(common_call) => {
963 task_state.commons.actions.get(get_str!(common_call.name, task_state, ActionError)).ok_or(ActionError::CommonActionNotFound)?.apply(&mut TaskState {
964 common_args: Some(&common_call.args.build(&task_state.to_view())?),
965 url : task_state.url,
966 scratchpad : task_state.scratchpad,
967 context : task_state.context,
968 job_context: task_state.job_context,
969 params : task_state.params,
970 commons : task_state.commons,
971 #[cfg(feature = "cache")]
972 cache : task_state.cache
973 })?
974 },
975 #[cfg(feature = "custom")]
976 Self::Custom(function) => function(task_state)?
977 };
978 Ok(())
979 }
980}