rusty_scrap/
lib.rs

1use std::{
2    fs::{self, OpenOptions},
3    io::{BufRead, BufReader, Read, Write},
4    net::TcpStream,
5};
6
7use http_scrap::Response;
8use native_tls::TlsConnector;
9use regex::Regex;
10use ZuckGate::Ask;
11
12pub struct Scrap<'url, UrlState, BuildState> {
13    url: &'url str,
14    urls: &'url [&'url str],
15    response: String,
16    urlstate: std::marker::PhantomData<UrlState>,
17    buildstate: std::marker::PhantomData<BuildState>,
18}
19
20pub struct UrlPresent;
21pub struct UrlNotPresent;
22pub struct Built;
23pub struct NotBuild;
24
25pub struct UrlsPresent;
26pub struct UrlsNotPresent;
27
28impl<'url> Scrap<'url, UrlNotPresent, NotBuild> {
29    pub fn new() -> Self {
30        Self {
31            url: "",
32            urls: &[""],
33            response: String::new(),
34            urlstate: std::marker::PhantomData,
35            buildstate: std::marker::PhantomData,
36        }
37    }
38    pub fn url(mut self, url: &'url str) -> Scrap<'url, UrlPresent, NotBuild> {
39        self.url = url;
40        Scrap {
41            url: self.url,
42            urls: &[""],
43            response: self.response,
44            urlstate: std::marker::PhantomData,
45            buildstate: std::marker::PhantomData,
46        }
47    }
48    pub fn urls(mut self, urls: &'url [&'url str]) -> Scrap<'url, UrlsPresent, NotBuild> {
49        self.urls = urls;
50        Scrap {
51            url: self.url,
52            urls: self.urls,
53            response: self.response,
54            urlstate: std::marker::PhantomData,
55            buildstate: std::marker::PhantomData,
56        }
57    }
58}
59
60impl<'scrap> Scrap<'scrap, UrlsPresent, NotBuild> {
61    pub fn build(mut self) -> Self {
62        // let mut response = String::new();
63        for url in self.urls {
64            // println!("{}", url);
65            let ask = Ask::new()
66                .url(url)
67                .get()
68                .connection(ZuckGate::Connections::Close)
69                .ask();
70            let response = Response::new(&ask);
71            let ask = response.content();
72
73            // println!("{}", ask);s
74            self.response.push_str(&format!("{}\r\n", ask));
75        }
76        // println!("{}", response);
77        self
78    }
79    pub fn as_raw_html(&self) -> String {
80        self.response.clone()
81    }
82    pub fn element_values(&self) -> String {
83        let response = Regex::new(r"<[^>]*>")
84            .unwrap()
85            .replace_all(&self.response, "")
86            .replace("\n", "")
87            .replace("\t", "");
88        // let response: Vec<&str> = response.split_whitespace().collect();
89        // println!("{}", response);
90        // String::new()
91        response.to_string()
92        // response.to_string()
93    }
94    pub fn paragrah(&self) -> String {
95        let response = Regex::new(r#"<div>(.*?)<\/div>"#).unwrap();
96        let mut paragraph = String::new();
97        for ch in response.captures_iter(&self.response) {
98            let p = &ch[0];
99            paragraph.push_str(p);
100        }
101        // paragraph
102        response.to_string()
103    }
104    pub fn text_elements(&self) -> String {
105        let response = Regex::new(r#"<h[1-6]>(.*?)<\/h[1-6]>|<p>(.*?)<\/p>"#).unwrap();
106        let mut paragraph = String::new();
107        for ch in response.captures_iter(&self.response) {
108            let p = &ch[0];
109            paragraph.push_str(p);
110        }
111        paragraph
112    }
113}
114impl<'scrap> Scrap<'scrap, UrlPresent, NotBuild> {
115    pub fn build(mut self) -> Self {
116        let ask = Ask::new()
117            .url(&self.url)
118            .get()
119            .connection(ZuckGate::Connections::Close)
120            .ask();
121        let response = Response::new(&ask);
122        let content = response.content();
123        // println!("{}", content);
124        self.response = content.to_string();
125        self
126    }
127    pub fn as_raw_html(&self) -> String {
128        self.response.clone()
129    }
130    pub fn element_values(&self) -> String {
131        let response = Regex::new(r"<[^>]*>")
132            .unwrap()
133            .replace_all(&self.response, "")
134            .replace("\n", "")
135            .replace("\t", "");
136        // let response: Vec<&str> = response.split_whitespace().collect();
137        // println!("{}", response);
138        // String::new()
139        response.to_string()
140        // response.to_string()
141    }
142    pub fn paragrah(&self) -> String {
143        let response = Regex::new(r#"<div>(.*?)<\/div>"#).unwrap();
144        let mut paragraph = String::new();
145        for ch in response.captures_iter(&self.response) {
146            let p = &ch[0];
147            paragraph.push_str(p);
148        }
149        paragraph
150        // response.to_string()
151    }
152    pub fn text_elements(&self) -> String {
153        let response = Regex::new(r#"<h[1-6]>(.*?)<\/h[1-6]>|<p>(.*?)<\/p>"#).unwrap();
154        let mut paragraph = String::new();
155        for ch in response.captures_iter(&self.response) {
156            let p = &ch[0];
157            paragraph.push_str(p);
158        }
159        paragraph
160    }
161}
162pub fn url(url: &str) -> String {
163    let url = url;
164    let start = url.find("https://").unwrap() + "https://".len();
165    let starts = &url[start..];
166    // println!("{}", starts);
167    let end = starts.find("/").unwrap();
168    let url = &starts[..end];
169    // println!("{}", url);
170    url.to_string()
171}
172
173// trait/     pub fn build(self) -> Self;
174//     fn as_raw_html(&self) -> String {
175//         // self.response
176//         String::new()
177//     }
178//     fn element_values(&self) -> String {
179// let response = Regex::new(r"<[^>]*>")
180//     .unwrap()
181//     .replace_all(&self.response, "")
182//     .replace("\n", "")
183//     .replace("\t", "");
184// // let response: Vec<&str> = response.split_whitespace().collect();
185// // println!("{}", response);
186// // String::new()
187// response.to_string()
188//         String::new()
189//     }
190//     fn paragrah(&self) -> String {
191//         // let response = Regex::new(r#"<div>(.*?)<\/div>"#).unwrap();
192//         // let mut paragraph = String::new();
193//         // for ch in response.captures_iter(&self.response) {
194//         //     let p = &ch[0];
195//         //     paragraph.push_str(p);
196//         // }
197//         // paragraph
198//         String::new()
199//         // response.to_string()
200//     }
201//     fn text_elements(&self) -> String {
202//         // let response = Regex::new(r#"<h[1-6]>(.*?)<\/h[1-6]>|<p>(.*?)<\/p>"#).unwrap();
203//         // let mut paragraph = String::new();
204//         // for ch in response.captures_iter(&self.response) {
205//         //     let p = &ch[0];
206//         //     paragraph.push_str(p);
207//         // }
208//         // paragraph
209//         String::new()
210//     }
211// }