1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
use fossick::Fossicker;
use futures::future::join_all;
pub use options::{PagefindInboundConfig, SearchOptions};
use wax::{Glob, WalkEntry};

use crate::index::build_indexes;

mod fossick;
mod fragments;
mod index;
mod options;
mod output;
pub mod serve;
mod utils;

pub struct SearchState {
    options: SearchOptions,
    files: Vec<Fossicker>,
}

impl SearchState {
    pub fn new(options: SearchOptions) -> Self {
        Self {
            options,
            files: vec![],
        }
    }

    pub async fn walk_for_files(&mut self) {
        println!("Walking source directory...");
        if let Ok(glob) = Glob::new(&self.options.glob) {
            self.files = glob
                .walk(&self.options.source, usize::MAX)
                .filter_map(Result::ok)
                .map(WalkEntry::into_path)
                .map(Fossicker::new)
                .collect()
        } else {
            eprintln!(
                "Error: Provided glob \"{}\" did not parse as a valid glob.",
                self.options.glob
            );
            std::process::exit(1);
        }
    }

    pub async fn run(&mut self) {
        if self.options.verbose {
            println!("Running Pagefind v{} in verbose mode", self.options.version);
        } else {
            println!("Running Pagefind v{}", self.options.version);
        }
        println!("Running from: {:?}", self.options.working_directory);
        println!("Source:       {:?}", self.options.source);
        println!("Bundle Directory:  {:?}", self.options.bundle_dir);
        self.walk_for_files().await;
        println!("Building search indexes...");

        let results: Vec<_> = self
            .files
            .iter_mut()
            .map(|f| f.fossick(&self.options))
            .collect();
        let all_pages = join_all(results).await;

        let used_custom_body = all_pages.iter().flatten().any(|page| page.has_custom_body);
        if used_custom_body {
            println!(
                "Found a data-pagefind-body element on the site.\n↳ Ignoring pages without this tag."
            );
        } else {
            println!(
                "Did not find a data-pagefind-body element on the site.\n↳ Indexing all <body> elements on the site."
            );
        }

        let pages_with_data = all_pages.into_iter().flatten().filter(|d| {
            if used_custom_body && !d.has_custom_body {
                return false;
            }
            !d.word_data.is_empty()
        });

        let indexes = build_indexes(pages_with_data, &self.options).await;
        indexes.write_files(&self.options).await;
    }
}