linkedin_profile_validator/
lib.rs1use regex::Regex;
40use thiserror::Error;
41use url::Url;
42
43#[derive(Error, Debug)]
45pub enum LinkedInUrlError {
46 #[error("Invalid URL format: {0}")]
48 InvalidUrl(String),
49
50 #[error("Not a LinkedIn URL")]
52 NotLinkedInUrl,
53
54 #[error("Not a LinkedIn profile URL")]
56 NotProfileUrl,
57
58 #[error("Network error: {0}")]
60 NetworkError(#[from] reqwest::Error),
61
62 #[error("Profile not found (404)")]
64 ProfileNotFound,
65
66 #[error("Unable to verify - LinkedIn requires authentication")]
68 AuthenticationRequired,
69}
70
71pub struct LinkedInValidator {
82 client: reqwest::blocking::Client,
83}
84
85impl LinkedInValidator {
86 #[must_use]
92 pub fn new() -> Self {
93 let client = reqwest::blocking::Client::builder()
94 .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
95 .timeout(std::time::Duration::from_secs(10))
96 .build()
97 .unwrap();
98
99 Self { client }
100 }
101
102 pub fn is_valid_linkedin_profile_url(&self, url_str: &str) -> Result<bool, LinkedInUrlError> {
137 let url = Url::parse(url_str).map_err(|e| LinkedInUrlError::InvalidUrl(e.to_string()))?;
138
139 if !is_linkedin_domain(&url) {
140 return Err(LinkedInUrlError::NotLinkedInUrl);
141 }
142
143 if !is_profile_path(&url) {
144 return Err(LinkedInUrlError::NotProfileUrl);
145 }
146
147 self.check_profile_exists(url_str)?;
148
149 Ok(true)
150 }
151
152 fn check_profile_exists(&self, url: &str) -> Result<(), LinkedInUrlError> {
153 let mut response = self.client.get(url).send()?;
154
155 if response.status().as_u16() == 999 {
158 response = self.client.get(url).header("Cookie", "sl=v=1&1").send()?;
160 }
161
162 let final_url = response.url().to_string();
164 if final_url.contains("/404/") || final_url.contains("linkedin.com/404") {
165 return Err(LinkedInUrlError::ProfileNotFound);
166 }
167
168 let body = response.text()?;
170
171 if body.contains("/authwall") || body.contains("sessionRedirect") {
173 return Err(LinkedInUrlError::AuthenticationRequired);
175 }
176
177 if body.contains("This page doesn't exist")
179 || body.contains("This page doesn't exist")
180 || body.contains("Page not found")
181 || body.contains("Check the URL or return to LinkedIn home")
182 || body.contains("return to LinkedIn home")
183 || body.contains("Go to your feed") && body.contains("doesn't exist")
184 {
185 return Err(LinkedInUrlError::ProfileNotFound);
186 }
187
188 Ok(())
189 }
190}
191
192fn is_linkedin_domain(url: &Url) -> bool {
193 matches!(url.domain(), Some(domain) if domain == "linkedin.com" || domain == "www.linkedin.com")
194}
195
196fn is_profile_path(url: &Url) -> bool {
197 let path = url.path();
198 let profile_regex = Regex::new(r"^/in/[a-zA-Z0-9\-]+/?$").unwrap();
199 profile_regex.is_match(path)
200}
201
202impl Default for LinkedInValidator {
203 fn default() -> Self {
204 Self::new()
205 }
206}
207
208pub async fn validate_linkedin_url_async(url: &str) -> Result<bool, LinkedInUrlError> {
245 let url_parsed = Url::parse(url).map_err(|e| LinkedInUrlError::InvalidUrl(e.to_string()))?;
246
247 if !is_linkedin_domain(&url_parsed) {
248 return Err(LinkedInUrlError::NotLinkedInUrl);
249 }
250
251 if !is_profile_path(&url_parsed) {
252 return Err(LinkedInUrlError::NotProfileUrl);
253 }
254
255 let client = reqwest::Client::builder()
256 .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
257 .timeout(std::time::Duration::from_secs(10))
258 .build()?;
259
260 let mut response = client.get(url).send().await?;
261
262 if response.status().as_u16() == 999 {
264 response = client.get(url).header("Cookie", "sl=v=1&1").send().await?;
266 }
267
268 let final_url = response.url().to_string();
270 if final_url.contains("/404/") || final_url.contains("linkedin.com/404") {
271 return Err(LinkedInUrlError::ProfileNotFound);
272 }
273
274 let body = response.text().await?;
276
277 if body.contains("/authwall") || body.contains("sessionRedirect") {
279 return Err(LinkedInUrlError::AuthenticationRequired);
280 }
281
282 if body.contains("This page doesn't exist")
284 || body.contains("This page doesn't exist")
285 || body.contains("Page not found")
286 || body.contains("Check the URL or return to LinkedIn home")
287 || body.contains("return to LinkedIn home")
288 || body.contains("Go to your feed") && body.contains("doesn't exist")
289 {
290 return Err(LinkedInUrlError::ProfileNotFound);
291 }
292
293 Ok(true)
294}
295
296#[must_use]
320pub fn is_valid_linkedin_profile_format(url: &str) -> bool {
321 let Ok(url_parsed) = Url::parse(url) else {
322 return false;
323 };
324
325 is_linkedin_domain(&url_parsed) && is_profile_path(&url_parsed)
326}
327
328#[cfg(test)]
329mod tests {
330 use super::*;
331
332 #[test]
333 fn test_valid_profile_format() {
334 assert!(is_valid_linkedin_profile_format(
336 "https://www.linkedin.com/in/hamze/"
337 ));
338 assert!(is_valid_linkedin_profile_format(
339 "https://www.linkedin.com/in/hamzeghalebi/"
340 ));
341 assert!(is_valid_linkedin_profile_format(
342 "https://www.linkedin.com/in/johndoe"
343 ));
344 assert!(is_valid_linkedin_profile_format(
345 "https://linkedin.com/in/jane-doe"
346 ));
347 assert!(is_valid_linkedin_profile_format(
348 "https://www.linkedin.com/in/john-doe-123/"
349 ));
350 }
351
352 #[test]
353 fn test_invalid_profile_format() {
354 assert!(!is_valid_linkedin_profile_format(
355 "https://www.google.com/in/johndoe"
356 ));
357 assert!(!is_valid_linkedin_profile_format(
358 "https://linkedin.com/company/microsoft"
359 ));
360 assert!(!is_valid_linkedin_profile_format("https://linkedin.com/"));
361 assert!(!is_valid_linkedin_profile_format("not-a-url"));
362 }
363
364 #[test]
365 fn test_real_valid_profile() {
366 let validator = LinkedInValidator::new();
367 match validator.is_valid_linkedin_profile_url("https://www.linkedin.com/in/hamze/") {
369 Ok(true) => (),
370 Ok(false) => panic!("Expected profile to be valid"),
371 Err(LinkedInUrlError::AuthenticationRequired) => {
372 println!("LinkedIn requires authentication - cannot verify profile existence");
373 }
374 Err(e) => panic!("Expected profile to be valid or require auth, got error: {e}"),
375 }
376 }
377
378 #[test]
379 fn test_real_invalid_profile() {
380 let validator = LinkedInValidator::new();
381 match validator.is_valid_linkedin_profile_url("https://www.linkedin.com/in/hamzeghalebi/") {
383 Ok(_) => {
384 println!("Warning: LinkedIn allowed access to profile page - cannot determine if profile actually exists");
387 }
388 Err(LinkedInUrlError::ProfileNotFound) => (),
389 Err(LinkedInUrlError::AuthenticationRequired) => {
390 println!("LinkedIn requires authentication - cannot verify profile existence");
391 }
392 Err(e) => panic!("Expected ProfileNotFound or AuthenticationRequired error, got: {e}"),
393 }
394 }
395
396 #[tokio::test]
397 async fn test_async_valid_profile() {
398 match validate_linkedin_url_async("https://www.linkedin.com/in/hamze/").await {
400 Ok(true) => (),
401 Ok(false) => panic!("Expected profile to be valid"),
402 Err(LinkedInUrlError::AuthenticationRequired) => {
403 println!("LinkedIn requires authentication - cannot verify profile existence");
404 }
405 Err(e) => panic!("Expected profile to be valid or require auth, got error: {e}"),
406 }
407 }
408
409 #[tokio::test]
410 async fn test_async_invalid_profile() {
411 match validate_linkedin_url_async("https://www.linkedin.com/in/hamzeghalebi/").await {
413 Ok(_) => {
414 println!("Warning: LinkedIn allowed access to profile page - cannot determine if profile actually exists");
417 }
418 Err(LinkedInUrlError::ProfileNotFound) => (),
419 Err(LinkedInUrlError::AuthenticationRequired) => {
420 println!("LinkedIn requires authentication - cannot verify profile existence");
421 }
422 Err(e) => panic!("Expected ProfileNotFound or AuthenticationRequired error, got: {e}"),
423 }
424 }
425
426 #[test]
427 #[ignore = "Debug test to inspect LinkedIn response"]
428 fn debug_linkedin_response() {
429 let client = reqwest::blocking::Client::builder()
430 .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
431 .timeout(std::time::Duration::from_secs(10))
432 .build()
433 .unwrap();
434
435 let url = "https://www.linkedin.com/in/hamzeghalebi/";
436 let response = client.get(url).send().unwrap();
437
438 println!("Status: {}", response.status());
439 println!("Final URL: {}", response.url());
440
441 let body = response.text().unwrap();
442 println!("Body length: {}", body.len());
443 println!(
444 "First 2000 chars:\n{}",
445 &body.chars().take(2000).collect::<String>()
446 );
447 }
448}