linkedin_profile_validator/
lib.rs1use regex::Regex;
40use thiserror::Error;
41use url::Url;
42
43#[derive(Error, Debug)]
45pub enum LinkedInUrlError {
46    #[error("Invalid URL format: {0}")]
48    InvalidUrl(String),
49
50    #[error("Not a LinkedIn URL")]
52    NotLinkedInUrl,
53
54    #[error("Not a LinkedIn profile URL")]
56    NotProfileUrl,
57
58    #[error("Network error: {0}")]
60    NetworkError(#[from] reqwest::Error),
61
62    #[error("Profile not found (404)")]
64    ProfileNotFound,
65
66    #[error("Unable to verify - LinkedIn requires authentication")]
68    AuthenticationRequired,
69}
70
71pub struct LinkedInValidator {
82    client: reqwest::blocking::Client,
83}
84
85impl LinkedInValidator {
86    #[must_use]
92    pub fn new() -> Self {
93        let client = reqwest::blocking::Client::builder()
94            .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
95            .timeout(std::time::Duration::from_secs(10))
96            .build()
97            .unwrap();
98
99        Self { client }
100    }
101
102    pub fn is_valid_linkedin_profile_url(&self, url_str: &str) -> Result<bool, LinkedInUrlError> {
137        let url = Url::parse(url_str).map_err(|e| LinkedInUrlError::InvalidUrl(e.to_string()))?;
138
139        if !is_linkedin_domain(&url) {
140            return Err(LinkedInUrlError::NotLinkedInUrl);
141        }
142
143        if !is_profile_path(&url) {
144            return Err(LinkedInUrlError::NotProfileUrl);
145        }
146
147        self.check_profile_exists(url_str)?;
148
149        Ok(true)
150    }
151
152    fn check_profile_exists(&self, url: &str) -> Result<(), LinkedInUrlError> {
153        let mut response = self.client.get(url).send()?;
154
155        if response.status().as_u16() == 999 {
158            response = self.client.get(url).header("Cookie", "sl=v=1&1").send()?;
160        }
161
162        let final_url = response.url().to_string();
164        if final_url.contains("/404/") || final_url.contains("linkedin.com/404") {
165            return Err(LinkedInUrlError::ProfileNotFound);
166        }
167
168        let body = response.text()?;
170
171        if body.contains("/authwall") || body.contains("sessionRedirect") {
173            return Err(LinkedInUrlError::AuthenticationRequired);
175        }
176
177        if body.contains("This page doesn't exist")
179            || body.contains("This page doesn't exist")
180            || body.contains("Page not found")
181            || body.contains("Check the URL or return to LinkedIn home")
182            || body.contains("return to LinkedIn home")
183            || body.contains("Go to your feed") && body.contains("doesn't exist")
184        {
185            return Err(LinkedInUrlError::ProfileNotFound);
186        }
187
188        Ok(())
189    }
190}
191
192fn is_linkedin_domain(url: &Url) -> bool {
193    matches!(url.domain(), Some(domain) if domain == "linkedin.com" || domain == "www.linkedin.com")
194}
195
196fn is_profile_path(url: &Url) -> bool {
197    let path = url.path();
198    let profile_regex = Regex::new(r"^/in/[a-zA-Z0-9\-]+/?$").unwrap();
199    profile_regex.is_match(path)
200}
201
202impl Default for LinkedInValidator {
203    fn default() -> Self {
204        Self::new()
205    }
206}
207
208pub async fn validate_linkedin_url_async(url: &str) -> Result<bool, LinkedInUrlError> {
245    let url_parsed = Url::parse(url).map_err(|e| LinkedInUrlError::InvalidUrl(e.to_string()))?;
246
247    if !is_linkedin_domain(&url_parsed) {
248        return Err(LinkedInUrlError::NotLinkedInUrl);
249    }
250
251    if !is_profile_path(&url_parsed) {
252        return Err(LinkedInUrlError::NotProfileUrl);
253    }
254
255    let client = reqwest::Client::builder()
256        .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
257        .timeout(std::time::Duration::from_secs(10))
258        .build()?;
259
260    let mut response = client.get(url).send().await?;
261
262    if response.status().as_u16() == 999 {
264        response = client.get(url).header("Cookie", "sl=v=1&1").send().await?;
266    }
267
268    let final_url = response.url().to_string();
270    if final_url.contains("/404/") || final_url.contains("linkedin.com/404") {
271        return Err(LinkedInUrlError::ProfileNotFound);
272    }
273
274    let body = response.text().await?;
276
277    if body.contains("/authwall") || body.contains("sessionRedirect") {
279        return Err(LinkedInUrlError::AuthenticationRequired);
280    }
281
282    if body.contains("This page doesn't exist")
284        || body.contains("This page doesn't exist")
285        || body.contains("Page not found")
286        || body.contains("Check the URL or return to LinkedIn home")
287        || body.contains("return to LinkedIn home")
288        || body.contains("Go to your feed") && body.contains("doesn't exist")
289    {
290        return Err(LinkedInUrlError::ProfileNotFound);
291    }
292
293    Ok(true)
294}
295
296#[must_use]
320pub fn is_valid_linkedin_profile_format(url: &str) -> bool {
321    let Ok(url_parsed) = Url::parse(url) else {
322        return false;
323    };
324
325    is_linkedin_domain(&url_parsed) && is_profile_path(&url_parsed)
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331
332    #[test]
333    fn test_valid_profile_format() {
334        assert!(is_valid_linkedin_profile_format(
336            "https://www.linkedin.com/in/hamze/"
337        ));
338        assert!(is_valid_linkedin_profile_format(
339            "https://www.linkedin.com/in/hamzeghalebi/"
340        ));
341        assert!(is_valid_linkedin_profile_format(
342            "https://www.linkedin.com/in/johndoe"
343        ));
344        assert!(is_valid_linkedin_profile_format(
345            "https://linkedin.com/in/jane-doe"
346        ));
347        assert!(is_valid_linkedin_profile_format(
348            "https://www.linkedin.com/in/john-doe-123/"
349        ));
350    }
351
352    #[test]
353    fn test_invalid_profile_format() {
354        assert!(!is_valid_linkedin_profile_format(
355            "https://www.google.com/in/johndoe"
356        ));
357        assert!(!is_valid_linkedin_profile_format(
358            "https://linkedin.com/company/microsoft"
359        ));
360        assert!(!is_valid_linkedin_profile_format("https://linkedin.com/"));
361        assert!(!is_valid_linkedin_profile_format("not-a-url"));
362    }
363
364    #[test]
365    fn test_real_valid_profile() {
366        let validator = LinkedInValidator::new();
367        match validator.is_valid_linkedin_profile_url("https://www.linkedin.com/in/hamze/") {
369            Ok(true) => (),
370            Ok(false) => panic!("Expected profile to be valid"),
371            Err(LinkedInUrlError::AuthenticationRequired) => {
372                println!("LinkedIn requires authentication - cannot verify profile existence");
373            }
374            Err(e) => panic!("Expected profile to be valid or require auth, got error: {e}"),
375        }
376    }
377
378    #[test]
379    fn test_real_invalid_profile() {
380        let validator = LinkedInValidator::new();
381        match validator.is_valid_linkedin_profile_url("https://www.linkedin.com/in/hamzeghalebi/") {
383            Ok(_) => {
384                println!("Warning: LinkedIn allowed access to profile page - cannot determine if profile actually exists");
387            }
388            Err(LinkedInUrlError::ProfileNotFound) => (),
389            Err(LinkedInUrlError::AuthenticationRequired) => {
390                println!("LinkedIn requires authentication - cannot verify profile existence");
391            }
392            Err(e) => panic!("Expected ProfileNotFound or AuthenticationRequired error, got: {e}"),
393        }
394    }
395
396    #[tokio::test]
397    async fn test_async_valid_profile() {
398        match validate_linkedin_url_async("https://www.linkedin.com/in/hamze/").await {
400            Ok(true) => (),
401            Ok(false) => panic!("Expected profile to be valid"),
402            Err(LinkedInUrlError::AuthenticationRequired) => {
403                println!("LinkedIn requires authentication - cannot verify profile existence");
404            }
405            Err(e) => panic!("Expected profile to be valid or require auth, got error: {e}"),
406        }
407    }
408
409    #[tokio::test]
410    async fn test_async_invalid_profile() {
411        match validate_linkedin_url_async("https://www.linkedin.com/in/hamzeghalebi/").await {
413            Ok(_) => {
414                println!("Warning: LinkedIn allowed access to profile page - cannot determine if profile actually exists");
417            }
418            Err(LinkedInUrlError::ProfileNotFound) => (),
419            Err(LinkedInUrlError::AuthenticationRequired) => {
420                println!("LinkedIn requires authentication - cannot verify profile existence");
421            }
422            Err(e) => panic!("Expected ProfileNotFound or AuthenticationRequired error, got: {e}"),
423        }
424    }
425
426    #[test]
427    #[ignore = "Debug test to inspect LinkedIn response"]
428    fn debug_linkedin_response() {
429        let client = reqwest::blocking::Client::builder()
430            .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
431            .timeout(std::time::Duration::from_secs(10))
432            .build()
433            .unwrap();
434
435        let url = "https://www.linkedin.com/in/hamzeghalebi/";
436        let response = client.get(url).send().unwrap();
437
438        println!("Status: {}", response.status());
439        println!("Final URL: {}", response.url());
440
441        let body = response.text().unwrap();
442        println!("Body length: {}", body.len());
443        println!(
444            "First 2000 chars:\n{}",
445            &body.chars().take(2000).collect::<String>()
446        );
447    }
448}