1use std::io::Error;
2use std::path::PathBuf;
3use tokio::fs::File;
4use tokio::io::{AsyncReadExt, BufReader};
5use tokio::time::{sleep, Duration};
6
7use crate::{FixOptions, Globals, SitefixIssue};
8use parser::DomParser;
9
10use self::parser::DomParserResult;
11
12mod parser;
13
14#[derive(Debug)]
15pub struct FossickedData {
16 pub file_path: PathBuf,
17 pub issues: Vec<SitefixIssue>,
18 pub has_html_element: bool,
19}
20
21#[derive(Debug)]
22pub struct Fossicker {
23 pub file_path: PathBuf,
24 pub urls: Vec<String>,
25 data: Option<DomParserResult>,
26}
27
28impl Fossicker {
29 pub fn new(file_path: PathBuf, options: &FixOptions) -> Self {
30 Self {
31 urls: vec![build_url(&file_path, options)],
32 file_path,
33 data: None,
34 }
35 }
36
37 async fn read_file(&mut self, globals: &Globals, options: &FixOptions) -> Result<(), Error> {
38 let file = File::open(&self.file_path).await?;
39
40 let mut rewriter = DomParser::new(globals, options);
41
42 let mut br = BufReader::new(file);
43 let mut buf = [0; 20000];
44 while let Ok(read) = br.read(&mut buf).await {
45 if read == 0 {
46 break;
47 }
48 if let Err(error) = rewriter.write(&buf[..read]) {
49 println!(
50 "Failed to parse file {} — skipping this file. Error:\n{error}",
51 self.file_path.to_str().unwrap_or("[unknown file]")
52 );
53 return Ok(());
54 }
55 }
56
57 self.data = Some(rewriter.wrap());
58
59 Ok(())
60 }
61
62 pub async fn fossick(
63 mut self,
64 globals: &Globals,
65 options: &FixOptions,
66 ) -> Result<FossickedData, ()> {
67 while self.read_file(globals, options).await.is_err() {
68 sleep(Duration::from_millis(1)).await;
69 }
70
71 if self.data.is_none() {
72 return Err(());
73 }
74
75 let data = self.data.unwrap();
76 Ok(FossickedData {
77 file_path: self.file_path,
78 has_html_element: data.has_html_element,
79 issues: data.issues,
80 })
81 }
82}
83
84fn build_url(page_url: &PathBuf, options: &FixOptions) -> String {
85 let url = page_url
86 .strip_prefix(&options.source)
87 .expect("File was found that does not start with the source directory");
88
89 format!(
90 "/{}",
91 url.to_str().unwrap().to_owned().replace("index.html", "")
92 )
93}