1use crate::utils::parse_url_with_raw_path;
2use crate::{event_handlers::Handles, statistics::StatError::UrlFormat, Command::AddError};
3use anyhow::{anyhow, bail, Result};
4use reqwest::Url;
5use std::collections::HashSet;
6use std::{fmt, sync::Arc};
7
8pub trait UrlExt {
10 fn is_in_scope(&self, scope: &[Url]) -> bool;
20
21 fn is_subdomain_of(&self, parent_url: &Url) -> bool;
23}
24
25impl UrlExt for Url {
26 fn is_in_scope(&self, scope: &[Url]) -> bool {
27 log::trace!("enter: is_in_scope({}, scope: {:?})", self.as_str(), scope);
28
29 if scope.is_empty() {
30 log::error!("is_in_scope check failed (scope is empty, this should not happen)");
31 log::trace!("exit: is_in_scope -> false");
32 return false;
33 }
34
35 for url in scope {
36 if self.host() == url.host() {
37 log::trace!("exit: is_in_scope -> true (same domain/host)");
38 return true;
39 }
40
41 if self.is_subdomain_of(url) {
42 log::trace!("exit: is_in_scope -> true (subdomain)");
43 return true;
44 }
45 }
46
47 log::trace!("exit: is_in_scope -> false");
48 false
49 }
50
51 fn is_subdomain_of(&self, parent_url: &Url) -> bool {
52 if let (Some(url_domain), Some(parent_domain)) = (self.domain(), parent_url.domain()) {
53 let candidate = url_domain.to_lowercase();
54 let candidate = candidate.trim_end_matches('.');
55
56 let parent = parent_domain.to_lowercase();
57 let parent = parent.trim_end_matches('.');
58
59 if candidate == parent {
60 return false;
62 }
63
64 let candidate_parts: Vec<&str> = candidate.split('.').collect();
65 let parent_parts: Vec<&str> = parent.split('.').collect();
66
67 if candidate_parts.len() <= parent_parts.len() {
68 return false;
70 }
71
72 candidate_parts
74 .iter()
75 .rev()
76 .zip(parent_parts.iter().rev())
77 .all(|(c, p)| c == p)
78 } else {
79 false
80 }
81 }
82}
83
84#[derive(Debug)]
86pub struct FeroxUrl {
87 pub target: String,
89
90 handles: Arc<Handles>,
92}
93
94impl FeroxUrl {
96 pub fn from_string(target: &str, handles: Arc<Handles>) -> Self {
98 Self {
99 handles,
100 target: String::from(target),
101 }
102 }
103
104 pub fn from_url(target: &Url, handles: Arc<Handles>) -> Self {
106 Self {
107 handles,
108 target: target.as_str().to_string(),
109 }
110 }
111
112 pub fn formatted_urls(
119 &self,
120 word: &str,
121 collected_extensions: HashSet<String>,
122 ) -> Result<Vec<Url>> {
123 log::trace!("enter: formatted_urls({word})");
124
125 let mut urls = vec![];
126
127 let slash = if self.handles.config.add_slash {
128 Some("/")
129 } else {
130 None
131 };
132
133 match self.format(word, slash) {
134 Ok(url) => urls.push(url),
136 Err(_) => self.handles.stats.send(AddError(UrlFormat))?,
137 }
138
139 for ext in self
140 .handles
141 .config
142 .extensions
143 .iter()
144 .chain(collected_extensions.iter())
145 {
146 match self.format(word, Some(ext)) {
147 Ok(url) => urls.push(url),
149 Err(_) => self.handles.stats.send(AddError(UrlFormat))?,
150 }
151 }
152 log::trace!("exit: formatted_urls -> {urls:?}");
153 Ok(urls)
154 }
155
156 pub fn format(&self, word: &str, extension: Option<&str>) -> Result<Url> {
160 log::trace!("enter: format({word}, {extension:?})");
161
162 if Url::parse(word).is_ok() {
163 let message = format!("word ({word}) from wordlist is a URL, skipping...");
171 log::warn!("{message}");
172 log::trace!("exit: format -> Err({message})");
173 bail!(message);
174 }
175
176 let url = if word.is_empty() {
184 self.target.to_string()
187 } else if !self.target.ends_with('/') {
188 format!("{}/", self.target)
189 } else {
190 self.target.to_string()
191 };
192
193 let mut word = if let Some(ext) = extension {
199 if ext == "/" {
202 format!("{word}/")
203 } else {
204 format!("{word}.{ext}")
205 }
206 } else {
207 String::from(word)
208 };
209
210 if word.starts_with("//") {
212 word = word.trim_start_matches('/').to_string();
220 };
221
222 let base_url = parse_url_with_raw_path(&url)?;
223 let mut joined = base_url.join(&word)?;
224
225 if !self.handles.config.queries.is_empty() {
226 joined
229 .query_pairs_mut()
230 .extend_pairs(self.handles.config.queries.iter());
231 }
232
233 log::trace!("exit: format_url -> {joined}");
234 Ok(joined)
235 }
236
237 pub fn normalize(&self) -> String {
241 log::trace!("enter: normalize");
242
243 let normalized = if self.target.ends_with('/') {
244 self.target.to_string()
245 } else {
246 format!("{}/", self.target)
247 };
248
249 log::trace!("exit: normalize -> {normalized}");
250 normalized
251 }
252
253 pub fn depth(&self) -> Result<usize> {
265 log::trace!("enter: get_depth");
266
267 let target = self.normalize();
268
269 let parsed = parse_url_with_raw_path(&target)?;
270 let parts = parsed
271 .path_segments()
272 .ok_or_else(|| anyhow!("No path segments found"))?;
273
274 let mut depth = 0;
276
277 for _ in parts {
278 depth += 1;
279 }
280
281 log::trace!("exit: get_depth -> {depth}");
282 Ok(depth)
283 }
284}
285
286impl fmt::Display for FeroxUrl {
288 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
290 write!(f, "{}", &self.target)
291 }
292}
293
294#[cfg(test)]
295mod tests {
296 use super::*;
297 use crate::config::Configuration;
298
299 #[test]
300 fn formatted_urls_no_extension_returns_base_url_with_word() {
302 let handles = Arc::new(Handles::for_testing(None, None).0);
303 let url = FeroxUrl::from_string("http://localhost", handles);
304 let urls = url.formatted_urls("turbo", HashSet::new()).unwrap();
305 assert_eq!(urls, [Url::parse("http://localhost/turbo").unwrap()])
306 }
307
308 #[test]
309 fn formatted_urls_one_extension_returns_two_urls() {
311 let config = Configuration {
312 extensions: vec![String::from("js")],
313 ..Default::default()
314 };
315
316 let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0);
317 let url = FeroxUrl::from_string("http://localhost", handles);
318 let urls = url.formatted_urls("turbo", HashSet::new()).unwrap();
319
320 assert_eq!(
321 urls,
322 [
323 Url::parse("http://localhost/turbo").unwrap(),
324 Url::parse("http://localhost/turbo.js").unwrap()
325 ]
326 )
327 }
328
329 #[test]
330 fn formatted_urls_multiple_extensions_returns_n_plus_one_urls() {
332 let ext_vec = vec![
333 vec![String::from("js")],
334 vec![String::from("js"), String::from("php")],
335 vec![String::from("js"), String::from("php"), String::from("pdf")],
336 vec![
337 String::from("js"),
338 String::from("php"),
339 String::from("pdf"),
340 String::from("tar.gz"),
341 ],
342 ];
343 let base = Url::parse("http://localhost/turbo").unwrap();
344 let js = Url::parse("http://localhost/turbo.js").unwrap();
345 let php = Url::parse("http://localhost/turbo.php").unwrap();
346 let pdf = Url::parse("http://localhost/turbo.pdf").unwrap();
347 let tar = Url::parse("http://localhost/turbo.tar.gz").unwrap();
348
349 let expected = [
350 vec![base.clone(), js.clone()],
351 vec![base.clone(), js.clone(), php.clone()],
352 vec![base.clone(), js.clone(), php.clone(), pdf.clone()],
353 vec![base, js, php, pdf, tar],
354 ];
355
356 for (i, ext_set) in ext_vec.into_iter().enumerate() {
357 let config = Configuration {
358 extensions: ext_set,
359 ..Default::default()
360 };
361
362 let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0);
363 let url = FeroxUrl::from_string("http://localhost", handles);
364
365 let urls = url.formatted_urls("turbo", HashSet::new()).unwrap();
366 assert_eq!(urls, expected[i]);
367 }
368 }
369
370 #[test]
371 fn depth_base_url_returns_1() {
373 let handles = Arc::new(Handles::for_testing(None, None).0);
374 let url = FeroxUrl::from_string("http://localhost", handles);
375
376 let depth = url.depth().unwrap();
377 assert_eq!(depth, 1);
378 }
379
380 #[test]
381 fn depth_base_url_with_slash_returns_1() {
383 let handles = Arc::new(Handles::for_testing(None, None).0);
384 let url = FeroxUrl::from_string("http://localhost/", handles);
385
386 let depth = url.depth().unwrap();
387 assert_eq!(depth, 1);
388 }
389
390 #[test]
391 fn depth_one_dir_returns_2() {
393 let handles = Arc::new(Handles::for_testing(None, None).0);
394 let url = FeroxUrl::from_string("http://localhost/src", handles);
395
396 let depth = url.depth().unwrap();
397 assert_eq!(depth, 2);
398 }
399
400 #[test]
401 fn depth_one_dir_with_slash_returns_2() {
403 let handles = Arc::new(Handles::for_testing(None, None).0);
404 let url = FeroxUrl::from_string("http://localhost/src/", handles);
405
406 let depth = url.depth().unwrap();
407 assert_eq!(depth, 2);
408 }
409
410 #[test]
411 fn format_url_normal() {
413 let handles = Arc::new(Handles::for_testing(None, None).0);
414 let url = FeroxUrl::from_string("http://localhost", handles);
415 let formatted = url.format("stuff", None).unwrap();
416
417 assert_eq!(
418 formatted,
419 reqwest::Url::parse("http://localhost/stuff").unwrap()
420 );
421 }
422
423 #[test]
424 fn format_url_no_word() {
426 let handles = Arc::new(Handles::for_testing(None, None).0);
427 let url = FeroxUrl::from_string("http://localhost", handles);
428 let formatted = url.format("", None).unwrap();
429 assert_eq!(formatted, reqwest::Url::parse("http://localhost").unwrap());
430 }
431
432 #[test]
433 fn format_url_joins_queries() {
435 let config = Configuration {
436 queries: vec![(String::from("stuff"), String::from("things"))],
437 ..Default::default()
438 };
439
440 let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0);
441 let url = FeroxUrl::from_string("http://localhost", handles);
442 let formatted = url.format("lazer", None).unwrap();
443
444 assert_eq!(
445 formatted,
446 reqwest::Url::parse("http://localhost/lazer?stuff=things").unwrap()
447 );
448 }
449
450 #[test]
451 fn format_url_without_word_joins_queries() {
453 let config = Configuration {
454 queries: vec![(String::from("stuff"), String::from("things"))],
455 ..Default::default()
456 };
457
458 let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0);
459 let url = FeroxUrl::from_string("http://localhost", handles);
460 let formatted = url.format("", None).unwrap();
461
462 assert_eq!(
463 formatted,
464 reqwest::Url::parse("http://localhost/?stuff=things").unwrap()
465 );
466 }
467
468 #[test]
469 #[should_panic]
470 fn format_url_no_url() {
472 let handles = Arc::new(Handles::for_testing(None, None).0);
473 let url = FeroxUrl::from_string("", handles);
474 url.format("stuff", None).unwrap();
475 }
476
477 #[test]
478 fn format_url_word_with_preslash() {
480 let handles = Arc::new(Handles::for_testing(None, None).0);
481 let url = FeroxUrl::from_string("http://localhost", handles);
482 let formatted = url.format("/stuff", None).unwrap();
483
484 assert_eq!(
485 formatted,
486 reqwest::Url::parse("http://localhost/stuff").unwrap()
487 );
488 }
489
490 #[test]
491 fn format_url_word_with_postslash() {
493 let handles = Arc::new(Handles::for_testing(None, None).0);
494 let url = FeroxUrl::from_string("http://localhost", handles);
495 let formatted = url.format("stuff/", None).unwrap();
496
497 assert_eq!(
498 formatted,
499 reqwest::Url::parse("http://localhost/stuff/").unwrap()
500 );
501 }
502
503 #[test]
504 fn format_url_word_with_two_prepended_slashes() {
506 let handles = Arc::new(Handles::for_testing(None, None).0);
507 let url = FeroxUrl::from_string("http://localhost", handles);
508 let formatted = url.format("//upload/img", None).unwrap();
509
510 assert_eq!(
511 formatted,
512 reqwest::Url::parse("http://localhost/upload/img").unwrap()
513 );
514 }
515
516 #[test]
517 fn format_url_word_with_two_prepended_slashes_and_extensions() {
519 let handles = Arc::new(Handles::for_testing(None, None).0);
520 let url = FeroxUrl::from_string("http://localhost", handles);
521 for ext in ["rocks", "fun"] {
522 let to_check = format!("http://localhost/upload/ferox.{ext}");
523 assert_eq!(
524 url.format("//upload/ferox", Some(ext)).unwrap(),
525 reqwest::Url::parse(&to_check[..]).unwrap()
526 );
527 }
528 }
529
530 #[test]
531 fn format_url_word_that_is_a_url() {
533 let handles = Arc::new(Handles::for_testing(None, None).0);
534 let url = FeroxUrl::from_string("http://localhost", handles);
535 let formatted = url.format("http://schmocalhost", None);
536
537 assert!(formatted.is_err());
538 }
539
540 #[test]
541 fn formatted_urls_with_postslash_and_extensions() {
545 let config = Configuration {
546 add_slash: true,
547 extensions: vec!["rocks".to_string(), "fun".to_string()],
548 ..Default::default()
549 };
550 let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0);
551 let url = FeroxUrl::from_string("http://localhost", handles);
552 match url.formatted_urls("ferox", HashSet::new()) {
553 Ok(urls) => {
554 assert_eq!(urls.len(), 3);
556 assert_eq!(
557 urls,
558 [
559 Url::parse("http://localhost/ferox/").unwrap(),
560 Url::parse("http://localhost/ferox.rocks").unwrap(),
561 Url::parse("http://localhost/ferox.fun").unwrap(),
562 ]
563 )
564 }
565 Err(err) => panic!("{}", err.to_string()),
566 }
567 }
568
569 #[test]
570 fn test_is_in_scope() {
572 let url = Url::parse("http://localhost").unwrap();
573 let scope = vec![
574 Url::parse("http://localhost").unwrap(),
575 Url::parse("http://example.com").unwrap(),
576 ];
577
578 assert!(url.is_in_scope(&scope));
579 }
580
581 #[test]
582 fn test_is_in_scope_subdomain() {
585 let url = Url::parse("http://sub.localhost").unwrap();
586 let scope = vec![
587 Url::parse("http://localhost").unwrap(),
588 Url::parse("http://example.com").unwrap(),
589 ];
590
591 assert!(url.is_in_scope(&scope));
592 }
593
594 #[test]
595 fn test_is_in_scope_not_in_scope() {
597 let url = Url::parse("http://notinscope.com").unwrap();
598 let scope = vec![
599 Url::parse("http://localhost").unwrap(),
600 Url::parse("http://example.com").unwrap(),
601 ];
602
603 assert!(!url.is_in_scope(&scope));
604 }
605
606 #[test]
607 fn test_is_in_scope_empty_scope() {
609 let url = Url::parse("http://localhost").unwrap();
610 let scope: Vec<Url> = vec![];
611
612 assert!(!url.is_in_scope(&scope));
613 }
614
615 #[test]
616 fn test_is_in_scope_domain_only_scope() {
618 let url = Url::parse("http://example.com").unwrap();
619 let scope = vec![Url::parse("http://example.com").unwrap()];
620
621 assert!(url.is_in_scope(&scope));
622 }
623
624 #[test]
625 fn test_is_in_scope_subdomain_domain_only_scope() {
627 let url = Url::parse("http://sub.example.com").unwrap();
628 let scope = vec![Url::parse("http://example.com").unwrap()];
629
630 assert!(url.is_in_scope(&scope));
631 }
632
633 #[test]
634 fn test_is_in_scope_no_domain() {
636 let url = Url::parse("file:///path/to/file").unwrap();
638 let scope = vec![Url::parse("http://example.com").unwrap()];
639
640 assert!(!url.is_in_scope(&scope));
641 }
642
643 #[test]
644 fn test_is_subdomain_of_true() {
646 let subdomain_url = Url::parse("http://sub.example.com").unwrap();
647 let parent_url = Url::parse("http://example.com").unwrap();
648
649 assert!(subdomain_url.is_subdomain_of(&parent_url));
650 }
651
652 #[test]
653 fn test_is_subdomain_of_same_domain() {
655 let url = Url::parse("http://example.com").unwrap();
656 let parent_url = Url::parse("http://example.com").unwrap();
657
658 assert!(!url.is_subdomain_of(&parent_url));
659 }
660
661 #[test]
662 fn test_is_subdomain_of_different_domain() {
664 let url = Url::parse("http://other.com").unwrap();
665 let parent_url = Url::parse("http://example.com").unwrap();
666
667 assert!(!url.is_subdomain_of(&parent_url));
668 }
669
670 #[test]
671 fn test_is_subdomain_of_multi_level() {
673 let subdomain_url = Url::parse("http://deep.sub.example.com").unwrap();
674 let parent_url = Url::parse("http://example.com").unwrap();
675
676 assert!(subdomain_url.is_subdomain_of(&parent_url));
677 }
678
679 #[test]
680 fn test_is_subdomain_of_no_domain() {
682 let url = Url::parse("file:///path/to/file").unwrap();
683 let parent_url = Url::parse("http://example.com").unwrap();
684
685 assert!(!url.is_subdomain_of(&parent_url));
686 }
687
688 #[test]
689 fn test_is_subdomain_of_parent_no_domain() {
691 let url = Url::parse("http://example.com").unwrap();
692 let parent_url = Url::parse("file:///path/to/file").unwrap();
693
694 assert!(!url.is_subdomain_of(&parent_url));
695 }
696
697 #[test]
698 fn test_is_not_in_empty_scope() {
700 let url = Url::parse("http://example.com/path").unwrap();
701 let scope: Vec<Url> = Vec::new();
702
703 assert!(!url.is_in_scope(&scope));
704 }
705
706 #[test]
707 fn test_is_in_scope_subdomain_with_empty_scope() {
709 let url = Url::parse("http://sub.example.com").unwrap();
710 let scope: Vec<Url> = vec![];
711
712 assert!(!url.is_in_scope(&scope));
713 }
714
715 #[test]
716 fn test_is_in_scope_scope_match() {
718 let url = Url::parse("http://other.com").unwrap();
719 let scope = vec![Url::parse("http://other.com").unwrap()];
720
721 assert!(url.is_in_scope(&scope));
722 }
723
724 #[test]
725 fn test_is_in_scope_not_allowed() {
727 let url = Url::parse("http://notallowed.com").unwrap();
728 let scope = vec![Url::parse("http://other.com").unwrap()];
729
730 assert!(!url.is_in_scope(&scope));
731 }
732
733 #[test]
734 fn test_is_in_scope_empty_scope_different_domain() {
736 let url = Url::parse("http://other.com").unwrap();
737 let scope: Vec<Url> = vec![];
738
739 assert!(!url.is_in_scope(&scope));
740 }
741
742 #[test]
743 fn test_is_in_scope_subdomain_in_scope() {
745 let url = Url::parse("http://sub.allowed.com").unwrap();
746 let scope = vec![Url::parse("http://allowed.com").unwrap()];
747
748 assert!(url.is_in_scope(&scope));
749 }
750}