sitefix/fossick/
mod.rs

1use std::io::Error;
2use std::path::PathBuf;
3use tokio::fs::File;
4use tokio::io::{AsyncReadExt, BufReader};
5use tokio::time::{sleep, Duration};
6
7use crate::{FixOptions, Globals, SitefixIssue};
8use parser::DomParser;
9
10use self::parser::DomParserResult;
11
12mod parser;
13
14#[derive(Debug)]
15pub struct FossickedData {
16    pub file_path: PathBuf,
17    pub issues: Vec<SitefixIssue>,
18    pub has_html_element: bool,
19}
20
21#[derive(Debug)]
22pub struct Fossicker {
23    pub file_path: PathBuf,
24    pub urls: Vec<String>,
25    data: Option<DomParserResult>,
26}
27
28impl Fossicker {
29    pub fn new(file_path: PathBuf, options: &FixOptions) -> Self {
30        Self {
31            urls: vec![build_url(&file_path, options)],
32            file_path,
33            data: None,
34        }
35    }
36
37    async fn read_file(&mut self, globals: &Globals, options: &FixOptions) -> Result<(), Error> {
38        let file = File::open(&self.file_path).await?;
39
40        let mut rewriter = DomParser::new(globals, options);
41
42        let mut br = BufReader::new(file);
43        let mut buf = [0; 20000];
44        while let Ok(read) = br.read(&mut buf).await {
45            if read == 0 {
46                break;
47            }
48            if let Err(error) = rewriter.write(&buf[..read]) {
49                println!(
50                    "Failed to parse file {} — skipping this file. Error:\n{error}",
51                    self.file_path.to_str().unwrap_or("[unknown file]")
52                );
53                return Ok(());
54            }
55        }
56
57        self.data = Some(rewriter.wrap());
58
59        Ok(())
60    }
61
62    pub async fn fossick(
63        mut self,
64        globals: &Globals,
65        options: &FixOptions,
66    ) -> Result<FossickedData, ()> {
67        while self.read_file(globals, options).await.is_err() {
68            sleep(Duration::from_millis(1)).await;
69        }
70
71        if self.data.is_none() {
72            return Err(());
73        }
74
75        let data = self.data.unwrap();
76        Ok(FossickedData {
77            file_path: self.file_path,
78            has_html_element: data.has_html_element,
79            issues: data.issues,
80        })
81    }
82}
83
84fn build_url(page_url: &PathBuf, options: &FixOptions) -> String {
85    let url = page_url
86        .strip_prefix(&options.source)
87        .expect("File was found that does not start with the source directory");
88
89    format!(
90        "/{}",
91        url.to_str().unwrap().to_owned().replace("index.html", "")
92    )
93}