use super::LinkedInProvider;
use super::helpers::{
extract_csrf_token, extract_username_from_url, strip_html, strip_html_comment,
};
use super::oembed::format_oembed_markdown;
use super::types::LinkedInOEmbed;
use super::url::{LinkedInUrlKind, classify_linkedin_url};
use crate::site::SiteProvider;
#[test]
fn classifies_profile_urls() {
assert_eq!(
classify_linkedin_url("https://www.linkedin.com/in/mikko-parkkola/"),
Some(LinkedInUrlKind::Profile)
);
assert_eq!(
classify_linkedin_url("https://linkedin.com/in/someuser"),
Some(LinkedInUrlKind::Profile)
);
}
#[test]
fn classifies_company_urls() {
assert_eq!(
classify_linkedin_url("https://www.linkedin.com/company/anthropic/"),
Some(LinkedInUrlKind::Company)
);
}
#[test]
fn classifies_post_urls() {
assert_eq!(
classify_linkedin_url("https://www.linkedin.com/posts/someuser_topic-activity-123456789"),
Some(LinkedInUrlKind::Post)
);
}
#[test]
fn classifies_pulse_urls() {
assert_eq!(
classify_linkedin_url("https://www.linkedin.com/pulse/some-article-title-author"),
Some(LinkedInUrlKind::Pulse)
);
}
#[test]
fn classifies_feed_update_urls() {
assert_eq!(
classify_linkedin_url(
"https://www.linkedin.com/feed/update/urn:li:activity:7654321098765432109"
),
Some(LinkedInUrlKind::FeedUpdate)
);
}
#[test]
fn classifies_activity_urls() {
assert_eq!(
classify_linkedin_url("https://www.linkedin.com/in/mikko-parkkola/recent-activity/all/"),
Some(LinkedInUrlKind::Activity)
);
}
#[test]
fn handles_query_params() {
assert_eq!(
classify_linkedin_url("https://www.linkedin.com/in/user?utm_source=share"),
Some(LinkedInUrlKind::Profile)
);
}
#[test]
fn rejects_non_linkedin_urls() {
assert_eq!(
classify_linkedin_url("https://youtube.com/watch?v=abc"),
None
);
assert_eq!(
classify_linkedin_url("https://twitter.com/user/status/123"),
None
);
}
#[test]
fn rejects_bare_linkedin() {
assert_eq!(classify_linkedin_url("https://www.linkedin.com/"), None);
assert_eq!(classify_linkedin_url("https://www.linkedin.com/in/"), None);
}
#[test]
fn matches_all_linkedin_url_kinds() {
let provider = LinkedInProvider;
assert!(provider.matches("https://www.linkedin.com/in/someuser"));
assert!(provider.matches("https://www.linkedin.com/company/somecompany"));
assert!(provider.matches("https://www.linkedin.com/posts/user_title-123"));
assert!(provider.matches("https://www.linkedin.com/pulse/article-title"));
assert!(provider.matches("https://www.linkedin.com/feed/update/urn:li:activity:123"));
assert!(provider.matches("https://www.linkedin.com/in/user/recent-activity/all/"));
}
#[test]
fn does_not_match_non_linkedin() {
let provider = LinkedInProvider;
assert!(!provider.matches("https://youtube.com/watch?v=abc"));
assert!(!provider.matches("https://twitter.com/user/status/123"));
}
#[test]
fn auth_required_kinds() {
assert!(LinkedInUrlKind::Profile.requires_auth());
assert!(LinkedInUrlKind::Company.requires_auth());
assert!(LinkedInUrlKind::Activity.requires_auth());
assert!(!LinkedInUrlKind::Post.requires_auth());
assert!(!LinkedInUrlKind::Pulse.requires_auth());
}
#[test]
fn oembed_fallback_kinds() {
assert!(LinkedInUrlKind::Post.has_oembed_fallback());
assert!(LinkedInUrlKind::Pulse.has_oembed_fallback());
assert!(LinkedInUrlKind::FeedUpdate.has_oembed_fallback());
assert!(!LinkedInUrlKind::Profile.has_oembed_fallback());
}
#[test]
fn strip_html_removes_tags() {
assert_eq!(strip_html("<p>Hello <b>world</b></p>"), "Hello world");
}
#[test]
fn strip_html_decodes_entities() {
assert_eq!(strip_html("& < >"), "& < >");
}
#[test]
fn format_oembed_with_full_data() {
let oembed = LinkedInOEmbed {
title: Some("The Future of Rust".to_string()),
author_name: Some("Jane Engineer".to_string()),
author_url: Some("https://www.linkedin.com/in/janeengineer".to_string()),
thumbnail_url: Some("https://media.linkedin.com/thumb.jpg".to_string()),
html: Some("<p>Great insights on systems programming.</p>".to_string()),
};
let url = "https://www.linkedin.com/posts/janeengineer_rust-123";
let md = format_oembed_markdown(&oembed, url);
assert!(md.contains("## The Future of Rust"));
assert!(md.contains("by Jane Engineer"));
assert!(md.contains(""));
assert!(md.contains("Great insights on systems programming."));
assert!(md.contains("[View on LinkedIn]"));
}
#[test]
fn format_oembed_with_minimal_data() {
let oembed = LinkedInOEmbed {
title: None,
author_name: Some("John Doe".to_string()),
author_url: None,
thumbnail_url: None,
html: None,
};
let url = "https://www.linkedin.com/posts/johndoe_post-456";
let md = format_oembed_markdown(&oembed, url);
assert!(!md.contains("##"));
assert!(md.contains("by John Doe"));
assert!(!md.contains("!["));
assert!(md.contains("[View on LinkedIn]"));
}
#[test]
fn extract_csrf_token_with_quotes() {
let cookies = r#"li_at=AQEDARabcd; JSESSIONID="ajax:1234567890""#;
let token = extract_csrf_token(cookies);
assert_eq!(token, Some("ajax:1234567890".to_string()));
}
#[test]
fn extract_csrf_token_without_quotes() {
let cookies = "li_at=AQEDARabcd; JSESSIONID=ajax:9876543210";
let token = extract_csrf_token(cookies);
assert_eq!(token, Some("ajax:9876543210".to_string()));
}
#[test]
fn extract_csrf_token_missing_jsessionid() {
let cookies = "li_at=AQEDARabcd; lang=en";
assert_eq!(extract_csrf_token(cookies), None);
}
#[test]
fn extract_csrf_token_case_insensitive_key() {
let cookies = r#"Jsessionid="ajax:5555""#;
assert_eq!(extract_csrf_token(cookies), Some("ajax:5555".to_string()));
}
#[test]
fn extract_username_simple_profile_url() {
let url = "https://www.linkedin.com/in/mikko-parkkola/";
let username = extract_username_from_url(url);
assert_eq!(username, Some("mikko-parkkola".to_string()));
}
#[test]
fn extract_username_strips_query_string() {
let url = "https://www.linkedin.com/in/someuser?utm_source=share";
assert_eq!(extract_username_from_url(url), Some("someuser".to_string()));
}
#[test]
fn extract_username_activity_subpath() {
let url = "https://www.linkedin.com/in/johndoe/recent-activity/all/";
assert_eq!(extract_username_from_url(url), Some("johndoe".to_string()));
}
#[test]
fn extract_username_non_profile_url() {
let url = "https://www.linkedin.com/company/anthropic/";
assert_eq!(extract_username_from_url(url), None);
}
#[cfg(feature = "impersonate")]
#[test]
fn parse_voyager_profile_full_response() {
use super::helpers::parse_voyager_profile;
use super::types::VoyagerProfileResponse;
let json = r#"{
"firstName": "Jane",
"lastName": "Engineer",
"headline": "Staff Engineer at Acme Corp",
"summary": "Passionate about distributed systems and Rust.",
"industryName": "Computer Software",
"geoLocationName": "San Francisco, California"
}"#;
let profile: VoyagerProfileResponse = serde_json::from_str(json).unwrap();
let md = parse_voyager_profile(&profile);
assert!(md.contains("## Jane Engineer"));
assert!(md.contains("Staff Engineer at Acme Corp"));
assert!(md.contains("Location: San Francisco, California"));
assert!(md.contains("Industry: Computer Software"));
assert!(md.contains("### About"));
assert!(md.contains("Passionate about distributed systems and Rust."));
}
#[cfg(feature = "impersonate")]
#[test]
fn parse_voyager_profile_minimal_response() {
use super::helpers::parse_voyager_profile;
use super::types::VoyagerProfileResponse;
let json = r#"{"firstName": "Jane"}"#;
let profile: VoyagerProfileResponse = serde_json::from_str(json).unwrap();
let md = parse_voyager_profile(&profile);
assert!(md.contains("## Jane"));
assert!(!md.contains("Industry:"));
assert!(!md.contains("### About"));
}
#[cfg(feature = "impersonate")]
#[test]
fn parse_voyager_profile_empty_summary_omitted() {
use super::helpers::parse_voyager_profile;
use super::types::VoyagerProfileResponse;
let json = r#"{"firstName": "Bob", "summary": " "}"#;
let profile: VoyagerProfileResponse = serde_json::from_str(json).unwrap();
let md = parse_voyager_profile(&profile);
assert!(!md.contains("### About"));
}
#[cfg(feature = "impersonate")]
#[test]
fn parse_voyager_activity_with_posts() {
use super::helpers::parse_voyager_activity;
use super::types::VoyagerActivityResponse;
let json = r#"{
"elements": [
{
"value": {
"commentary": {
"text": { "text": "First post content here." }
}
}
},
{
"value": {
"commentary": {
"text": { "text": "Second post content here." }
}
}
}
]
}"#;
let activity: VoyagerActivityResponse = serde_json::from_str(json).unwrap();
let md = parse_voyager_activity(&activity);
assert!(md.contains("First post content here."));
assert!(md.contains("Second post content here."));
assert_eq!(md.matches("---").count(), 2);
}
#[cfg(feature = "impersonate")]
#[test]
fn parse_voyager_activity_skips_elements_without_commentary() {
use super::helpers::parse_voyager_activity;
use super::types::VoyagerActivityResponse;
let json = r#"{
"elements": [
{ "value": null },
{
"value": {
"commentary": {
"text": { "text": "Real post text." }
}
}
}
]
}"#;
let activity: VoyagerActivityResponse = serde_json::from_str(json).unwrap();
let md = parse_voyager_activity(&activity);
assert!(md.contains("Real post text."));
assert_eq!(md.matches("---").count(), 1);
}
#[cfg(feature = "impersonate")]
#[test]
fn parse_voyager_activity_empty_feed() {
use super::helpers::parse_voyager_activity;
use super::types::VoyagerActivityResponse;
let json = r#"{"elements": []}"#;
let activity: VoyagerActivityResponse = serde_json::from_str(json).unwrap();
let md = parse_voyager_activity(&activity);
assert!(md.trim().is_empty());
}
#[cfg(feature = "impersonate")]
#[test]
fn parses_json_ld_profile() {
use super::auth::parse_linkedin_html;
let html = r#"
<html>
<head>
<script type="application/ld+json">
{
"@type": "Person",
"name": "Mikko Parkkola",
"description": "Building things with Rust and AI",
"image": "https://media.linkedin.com/photo.jpg"
}
</script>
</head>
<body></body>
</html>
"#;
let content = parse_linkedin_html(
html,
"https://linkedin.com/in/mikko",
LinkedInUrlKind::Profile,
)
.unwrap();
assert!(content.markdown.contains("## Mikko Parkkola"));
assert!(
content
.markdown
.contains("Building things with Rust and AI")
);
assert_eq!(content.metadata.platform, "LinkedIn (Profile)");
}
#[cfg(feature = "impersonate")]
#[test]
fn falls_back_to_selectors() {
use super::auth::parse_linkedin_html;
let html = r#"
<html>
<head>
<title>Mikko Parkkola | LinkedIn</title>
<meta property="og:description" content="Rust developer and AI enthusiast">
<meta property="og:image" content="https://media.linkedin.com/photo.jpg">
</head>
<body>
<h1>Mikko Parkkola</h1>
<div class="text-body-medium">Senior Engineer at Some Company</div>
</body>
</html>
"#;
let content = parse_linkedin_html(
html,
"https://linkedin.com/in/mikko",
LinkedInUrlKind::Profile,
)
.unwrap();
assert!(content.markdown.contains("## Mikko Parkkola"));
assert!(content.markdown.contains("Senior Engineer at Some Company"));
}
#[cfg(feature = "impersonate")]
#[test]
fn og_description_fallback() {
use super::auth::parse_linkedin_html;
let html = r#"
<html>
<head>
<meta property="og:description" content="This is the only content available">
</head>
<body></body>
</html>
"#;
let content = parse_linkedin_html(
html,
"https://linkedin.com/in/user",
LinkedInUrlKind::Profile,
)
.unwrap();
assert!(
content
.markdown
.contains("This is the only content available")
);
}
#[test]
fn strip_comment_removes_html_comment_wrapper() {
let input = r#"<!--{"firstName":"Jane"}-->"#;
let result = strip_html_comment(input);
assert_eq!(result, r#"{"firstName":"Jane"}"#);
}
#[test]
fn strip_comment_trims_whitespace_inside_comment() {
let input = "<!-- {\"key\":\"value\"} -->";
let result = strip_html_comment(input);
assert_eq!(result, r#"{"key":"value"}"#);
}
#[test]
fn strip_comment_passthrough_when_no_comment_wrapper() {
let input = r#"{"firstName":"Jane"}"#;
let result = strip_html_comment(input);
assert_eq!(result, input);
}
#[test]
fn strip_comment_passthrough_empty_string() {
assert_eq!(strip_html_comment(""), "");
}
#[cfg(feature = "impersonate")]
#[test]
fn extract_post_text_voyager_nested_shape() {
use super::auth::extract_post_text;
let mut map = serde_json::Map::new();
let mut commentary = serde_json::Map::new();
let mut text_inner = serde_json::Map::new();
text_inner.insert("text".into(), serde_json::json!("Voyager nested post text"));
commentary.insert("text".into(), serde_json::Value::Object(text_inner));
map.insert("commentary".into(), serde_json::Value::Object(commentary));
let result = extract_post_text(&map);
assert_eq!(result.as_deref(), Some("Voyager nested post text"));
}
#[cfg(feature = "impersonate")]
#[test]
fn extract_post_text_flat_commentary_text() {
use super::auth::extract_post_text;
let mut map = serde_json::Map::new();
let mut commentary = serde_json::Map::new();
commentary.insert("text".into(), serde_json::json!("Flat commentary text"));
map.insert("commentary".into(), serde_json::Value::Object(commentary));
let result = extract_post_text(&map);
assert_eq!(result.as_deref(), Some("Flat commentary text"));
}
#[cfg(feature = "impersonate")]
#[test]
fn extract_post_text_string_commentary() {
use super::auth::extract_post_text;
let mut map = serde_json::Map::new();
map.insert("commentary".into(), serde_json::json!("Direct string post"));
let result = extract_post_text(&map);
assert_eq!(result.as_deref(), Some("Direct string post"));
}
#[cfg(feature = "impersonate")]
#[test]
fn extract_post_text_returns_none_when_absent() {
use super::auth::extract_post_text;
let mut map = serde_json::Map::new();
map.insert("firstName".into(), serde_json::json!("Jane"));
let result = extract_post_text(&map);
assert!(result.is_none());
}
#[cfg(feature = "impersonate")]
#[test]
fn extract_post_text_skips_blank_commentary() {
use super::auth::extract_post_text;
let mut map = serde_json::Map::new();
map.insert("commentary".into(), serde_json::json!(" "));
let result = extract_post_text(&map);
assert!(result.is_none());
}
#[cfg(feature = "impersonate")]
#[test]
fn looks_like_profile_with_two_profile_keys() {
use super::auth::looks_like_profile;
let map: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(r#"{"firstName":"Jane","headline":"Staff Engineer"}"#).unwrap();
assert!(looks_like_profile(&map));
}
#[cfg(feature = "impersonate")]
#[test]
fn looks_like_profile_rejects_single_profile_key() {
use super::auth::looks_like_profile;
let map: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(r#"{"firstName":"Jane","unrelated":"data"}"#).unwrap();
assert!(!looks_like_profile(&map));
}
#[cfg(feature = "impersonate")]
#[test]
fn looks_like_profile_rejects_non_profile_object() {
use super::auth::looks_like_profile;
let map: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(r#"{"color":"blue","size":42}"#).unwrap();
assert!(!looks_like_profile(&map));
}
#[cfg(feature = "impersonate")]
#[test]
fn code_tag_extraction_profile_data() {
use super::auth::parse_linkedin_html;
let profile_json = r#"{"firstName":"Jane","lastName":"Engineer","headline":"Staff Engineer at Acme","summary":"Building systems in Rust.","geoLocationName":"Helsinki, Finland","industryName":"Computer Software"}"#;
let html = format!(
r#"<!DOCTYPE html><html><head></head><body>
<code style="display:none" id="bpr-guid-1"><!—{profile_json}—></code>
<code style="display:none" id="bpr-guid-2"><!--{profile_json}--></code>
</body></html>"#
);
let content = parse_linkedin_html(
&html,
"https://linkedin.com/in/janeengineer",
LinkedInUrlKind::Profile,
)
.unwrap();
assert!(
content.markdown.contains("## Jane Engineer"),
"Missing name: {}",
content.markdown
);
assert!(
content.markdown.contains("Staff Engineer at Acme"),
"Missing headline: {}",
content.markdown
);
assert!(
content.markdown.contains("Helsinki, Finland"),
"Missing location: {}",
content.markdown
);
assert!(
content.markdown.contains("Computer Software"),
"Missing industry: {}",
content.markdown
);
assert!(
content.markdown.contains("Building systems in Rust."),
"Missing summary: {}",
content.markdown
);
assert_eq!(content.metadata.platform, "LinkedIn (Profile)");
}
#[cfg(feature = "impersonate")]
#[test]
fn code_tag_extraction_post_commentary() {
use super::auth::parse_linkedin_html;
let post_json = r#"{"commentary":{"text":{"text":"Just shipped Rust HTTP/3 client."}},"actor":{"name":"Jane Engineer"}}"#;
let html = format!(
r#"<!DOCTYPE html><html><head></head><body>
<code style="display:none" id="bpr-guid-1"><!--{post_json}--></code>
</body></html>"#
);
let content = parse_linkedin_html(
&html,
"https://linkedin.com/posts/jane_rust-123",
LinkedInUrlKind::Post,
)
.unwrap();
assert!(
content
.markdown
.contains("Just shipped Rust HTTP/3 client."),
"Missing post text: {}",
content.markdown
);
}
#[cfg(feature = "impersonate")]
#[test]
fn code_tag_extraction_deduplicates_posts() {
use super::auth::parse_linkedin_html;
let post_json = r#"{"commentary":"Unique post text."}"#;
let html = format!(
r"<!DOCTYPE html><html><head></head><body>
<code><!--{post_json}--></code>
<code><!--{post_json}--></code>
</body></html>"
);
let content = parse_linkedin_html(
&html,
"https://linkedin.com/posts/user_post-123",
LinkedInUrlKind::Post,
)
.unwrap();
assert_eq!(
content.markdown.matches("Unique post text.").count(),
1,
"Expected 1 occurrence but got more: {}",
content.markdown
);
}
#[cfg(feature = "impersonate")]
#[test]
fn code_tag_extraction_falls_through_to_json_ld_when_empty() {
use super::auth::parse_linkedin_html;
let html = r#"<!DOCTYPE html><html><head>
<script type="application/ld+json">
{"@type":"Person","name":"Fallback Person","description":"JSON-LD description"}
</script>
</head><body>
<code><!--{"irrelevant":"noise"}--></code>
</body></html>"#;
let content = parse_linkedin_html(
html,
"https://linkedin.com/in/fallback",
LinkedInUrlKind::Profile,
)
.unwrap();
assert!(
content.markdown.contains("## Fallback Person"),
"Expected JSON-LD fallback: {}",
content.markdown
);
assert!(
content.markdown.contains("JSON-LD description"),
"Expected JSON-LD desc: {}",
content.markdown
);
}
#[cfg(feature = "impersonate")]
#[test]
fn code_tag_extraction_handles_malformed_json_gracefully() {
use super::auth::parse_linkedin_html;
let html = r#"<!DOCTYPE html><html><head>
<meta property="og:description" content="og fallback works">
</head><body>
<code><!--{broken json--></code>
<code><!--not json at all--></code>
</body></html>"#;
let content = parse_linkedin_html(
html,
"https://linkedin.com/in/user",
LinkedInUrlKind::Profile,
)
.unwrap();
assert!(
content.markdown.contains("og fallback works"),
"Expected og fallback: {}",
content.markdown
);
}
#[cfg(feature = "impersonate")]
#[test]
fn code_tag_extraction_nested_profile_in_object() {
use super::auth::parse_linkedin_html;
let html = r#"<!DOCTYPE html><html><head></head><body>
<code><!--{"data":{"profile":{"firstName":"Nested","lastName":"Profile","headline":"CTO at Example","summary":"Led engineering teams for a decade."}}}--></code>
</body></html>"#;
let content = parse_linkedin_html(
html,
"https://linkedin.com/in/nested",
LinkedInUrlKind::Profile,
)
.unwrap();
assert!(
content.markdown.contains("## Nested Profile"),
"Missing nested profile: {}",
content.markdown
);
assert!(
content.markdown.contains("CTO at Example"),
"Missing headline: {}",
content.markdown
);
}
#[cfg(feature = "impersonate")]
#[test]
fn code_tag_profile_without_industry_still_renders() {
use super::auth::parse_linkedin_html;
let html = r#"<!DOCTYPE html><html><head></head><body>
<code><!--{"firstName":"Alice","lastName":"Smith","headline":"Engineer"}--></code>
</body></html>"#;
let content = parse_linkedin_html(
html,
"https://linkedin.com/in/alice",
LinkedInUrlKind::Profile,
)
.unwrap();
assert!(content.markdown.contains("## Alice Smith"));
assert!(content.markdown.contains("Engineer"));
assert!(!content.markdown.contains("Industry:"));
}