1#![allow(unused)]
3
4pub mod wiki_info;
5
6#[cfg(test)]
8mod tests {
9 use crate::wiki_info::{
10 clean_document, cosine_sim, get_page_similarity, page_from_url,
11 url_utils::{self, resolve_wiki_url},
12 };
13
14 use super::wiki_info::{get_most_similar_page, page_from_title, Page};
15
16 #[test]
17 fn test_page_from_title() {
18 let page = page_from_title("Paris").unwrap();
19 assert_eq!(page.title, "Paris");
20 }
21
22 #[test]
23 fn test_get_most_sim_pages() {
24 let main: Page = page_from_title("Paris").unwrap();
25
26 let pages_to_check: Vec<Page> = vec![
27 "France",
28 "European Union",
29 "World War I",
30 "Prime Minister of France",
31 ]
32 .iter()
33 .map(|title| page_from_title(title).unwrap())
34 .collect();
35
36 let most_similar_page = get_most_similar_page(&main, &pages_to_check);
37
38 assert_eq!(pages_to_check[most_similar_page].title, "France")
39 }
40
41 #[test]
42 fn test_same_documents_eq() {
43 let page = page_from_title("Prime Minister of France").unwrap();
44 let same_page = page_from_title("Prime Minister of France").unwrap();
45
46 let sim_score = get_page_similarity(&page, &same_page);
47
48 assert!(
49 sim_score > 0.98,
50 "Same page should be practically identical"
51 );
52 }
53
54 #[test]
55 fn test_url_resolve() {
56 assert_eq!(
57 resolve_wiki_url("Prime Minister of France").unwrap(),
58 "https://en.wikipedia.org/wiki/Prime_Minister_of_France"
59 )
60 }
61
62 #[test]
63 fn test_doc_difference() {
64 let page1 =
65 page_from_url("https://en.wikipedia.org/wiki/Prime_Minister_of_France").unwrap();
66
67 let page2 = page_from_url("https://en.wikipedia.org/wiki/The_Dark_Tower_(series)").unwrap();
68
69 let sim = get_page_similarity(&page1, &page2);
70 assert!(sim < 0.2, "Pages should differ signifcantly");
71 }
72
73 #[test]
74 fn test_traversal() {
75 let starting_url = "https://en.wikipedia.org/wiki/The_Dark_Tower_(series)";
76 let starting_page = page_from_url(starting_url).unwrap();
77
78 let link_num = 19;
79 let target_link = &starting_page.links[link_num].outlink;
80
81 let target_page = page_from_url(target_link).unwrap();
82
83 println!("Title of {}th link: {}", link_num, target_page.title);
84
85 assert!(
86 !target_page.title.is_empty(),
87 "The target page should have a title."
88 );
89 }
90
91 #[test]
92 fn test_clean_doc() {
93 let starting_url = "https://en.wikipedia.org/wiki/The_Dark_Tower_(series)";
94 let starting_page = page_from_url(starting_url).unwrap();
95
96 let cleaned = clean_document(&starting_page);
97
98 println!("Content: {:?}", cleaned);
99
100 let words: Vec<&str> = cleaned.content.split_whitespace().collect();
101
102 assert!(
103 !words.contains(&"the"),
104 "Should not contain common stop word 'the'"
105 );
106 assert!(
107 !words.contains(&"and"),
108 "Should not contain common stop word 'and'"
109 );
110 assert!(
111 !words.contains(&"a"),
112 "Should not contain common stop word 'a'"
113 );
114 }
115
116 use scraper::Html;
117
118 #[test]
119 fn test_page_from_title_spec_chars() {
120 let title = "Rust_(programming_language)";
121 let page = page_from_title(title).unwrap();
122 assert_eq!(page.title, "Rust (programming language)");
123 assert!(!page.content.is_empty());
124 assert!(!page.links.is_empty());
125 }
126
127 #[test]
128 fn test_page_from_url() {
129 let url = "https://en.wikipedia.org/wiki/Rust_(programming_language)";
130 let page = page_from_url(url).unwrap();
131 assert_eq!(page.title, "Rust (programming language)");
132 assert!(!page.content.is_empty());
133 assert!(!page.links.is_empty());
134 }
135
136 #[test]
137 fn test_clean_meta_content() {
138 let raw_content = " Some content \nwith \n\nlots of whitespace. ";
139 let cleaned = crate::wiki_info::clean_meta_content(raw_content);
140 assert_eq!(cleaned, "Some content with lots of whitespace.");
141 }
142
143 #[test]
144 fn test_process_content() {
145 let html = r#"
146 <html>
147 <body>
148 <div class="mw-content-container">
149 <main id="content">
150 <p>This is a test paragraph with <a href="/wiki/Test_Link">a link</a>.</p>
151 </main>
152 </div>
153 </body>
154 </html>
155 "#;
156 let document = Html::parse_document(html);
157 let selector = scraper::Selector::parse("div.mw-content-container main#content").unwrap();
158 let element = document.select(&selector).next().unwrap();
159
160 let page = crate::wiki_info::process_content(element, "Test Page");
161 assert_eq!(page.title, "Test Page");
162 assert!(page.content.contains("This is a test paragraph"));
163 assert_eq!(page.links.len(), 1);
164 assert_eq!(page.links[0].title, "a link");
165 assert_eq!(
166 page.links[0].outlink,
167 "https://en.wikipedia.org/wiki/Test_Link"
168 );
169 }
170
171 #[test]
172 fn test_clean_document() {
173 let page = Page {
174 title: "Test Page".to_string(),
175 content: "The quick brown fox jumps over the lazy dog.".to_string(),
176 links: vec![],
177 };
178 let cleaned = clean_document(&page);
179 assert!(cleaned.content.contains("quick"));
180 assert!(!cleaned.content.contains("the")); }
182
183 #[test]
196 fn test_cosine_similarity() {
197 let vec1 = vec![1.0, 0.5, 0.0];
198 let vec2 = vec![0.5, 1.0, 0.0];
199 let sim = cosine_sim(&vec1, &vec2);
200 assert!(sim > 0.7);
201 }
202
203 #[test]
204 fn test_get_page_similarity() {
205 let page1 = Page {
206 title: "Page 1".to_string(),
207 content: "The quick brown fox jumps over the lazy dog.".to_string(),
208 links: vec![],
209 };
210 let page2 = Page {
211 title: "Page 2".to_string(),
212 content: "The quick brown cat sleeps under the lazy dog.".to_string(),
213 links: vec![],
214 };
215 let similarity = get_page_similarity(&page1, &page2);
216 assert!(similarity > 0.5);
217 }
218
219 #[test]
220 fn test_url_utils_title_from_url() {
221 let url = "https://en.wikipedia.org/wiki/Rust_(programming_language)";
222 let title = url_utils::title_from_url(url);
223 assert_eq!(title, "Rust (programming language) ");
224 }
225
226 #[test]
227 fn test_url_utils_resolve_wiki_url() {
228 let title = "Rust_(programming_language)";
229 let resolved_url = url_utils::resolve_wiki_url(title).unwrap();
230
231 println!("RESOLVED URL: {:?}", resolved_url);
232
233 assert!(resolved_url.contains("https://en.wikipedia.org/wiki/"));
234 }
235
236 #[test]
237 fn test_invalid_url() {
238 let invalid_url = "not-a-url";
239 let result = url_utils::resolve_wiki_url(invalid_url);
240 assert!(result.is_err(), "Expected error for invalid URL");
241 }
242
243 #[test]
244 fn test_non_200_status() {
245 let mock_url = "https://en.wikipedia.org/wiki/NonexistentPage404"; let result = url_utils::resolve_wiki_url(mock_url);
247 assert!(result.is_err(), "Expected error for non-200 HTTP status");
248 if let Err(e) = result {
249 assert!(
250 e.to_string().contains("URL returned status"),
251 "Unexpected error message: {}",
252 e
253 );
254 }
255 }
256}