use std::fs;
use std::path::{
Path,
PathBuf,
};
use std::time::{
SystemTime,
UNIX_EPOCH,
};
use regex::Regex;
use reqwest::StatusCode;
use reqwest::header::{
COOKIE,
HeaderMap,
HeaderValue,
USER_AGENT,
};
use rustybook_extractor::html::extract_user_name;
use rustybook_http::client::{
Client as HttpClient,
Request as HttpRequest,
};
use serde_json::Value;
use tracing::{
debug,
trace,
warn,
};
use uuid::Uuid;
use crate::auth::load_cookies;
use crate::error::MessengerError;
use crate::graphql::strip_json_guard;
const DEFAULT_USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36";
#[derive(Debug, Clone)]
pub struct State {
pub user_id: String,
pub user_name: Option<String>,
pub user_agent: String,
pub cookie_header: String,
pub fb_dtsg: Option<String>,
pub lsd: Option<String>,
pub jazoest: Option<String>,
pub client_revision: Option<u64>,
pub mqtt_client_id: String,
pub mqtt_app_id: String,
pub ls_app_id: String,
pub ls_version_id: String,
pub region: String,
pub sequence_id: Option<u64>,
pub http_dump_dir: PathBuf,
pub(crate) http: HttpClient,
}
impl State {
pub async fn from_cookies_file(
path: &str,
user_agent: Option<&str>,
proxy: Option<&str>,
) -> Result<Self, MessengerError> {
let auth = load_cookies(path)?;
let user_agent = user_agent.unwrap_or(DEFAULT_USER_AGENT).to_string();
let http = build_http_client(&auth.cookie_header, &user_agent, proxy)?;
let mut state = State {
user_id: auth.user_id.clone(),
user_name: None,
user_agent,
cookie_header: auth.cookie_header,
fb_dtsg: None,
lsd: None,
jazoest: None,
client_revision: None,
mqtt_client_id: Uuid::new_v4().to_string(),
mqtt_app_id: "219994525426954".to_string(),
ls_app_id: "2220391788200892".to_string(),
ls_version_id: "26461632200121827".to_string(),
region: "prn".to_string(),
sequence_id: None,
http_dump_dir: build_http_dump_dir()?,
http,
};
debug!(
path = state.http_dump_dir.display().to_string(),
"http dump directory initialized"
);
state.bootstrap().await?;
Ok(state)
}
pub async fn from_shared(
user_id: String,
cookie_header: String,
http: HttpClient,
user_agent: Option<&str>,
) -> Result<Self, MessengerError> {
let user_agent = user_agent.unwrap_or(DEFAULT_USER_AGENT).to_string();
let mut state = State {
user_id,
user_name: None,
user_agent,
cookie_header,
fb_dtsg: None,
lsd: None,
jazoest: None,
client_revision: None,
mqtt_client_id: Uuid::new_v4().to_string(),
mqtt_app_id: "219994525426954".to_string(),
ls_app_id: "2220391788200892".to_string(),
ls_version_id: "26461632200121827".to_string(),
region: "prn".to_string(),
sequence_id: None,
http_dump_dir: build_http_dump_dir()?,
http,
};
debug!(
path = state.http_dump_dir.display().to_string(),
"http dump directory initialized"
);
state.bootstrap().await?;
Ok(state)
}
pub async fn fetch_sequence_id(&self) -> Result<u64, MessengerError> {
let queries = serde_json::json!({
"q0": {
"doc_id": "1349387578499440",
"query_params": {
"limit": 1,
"tags": ["INBOX"],
"before": serde_json::Value::Null,
"includeDeliveryReceipts": false,
"includeSeqID": true
}
}
})
.to_string();
let mut form_fields = vec![
("method", "GET".to_string()),
("response_format", "json".to_string()),
(
"batch_name",
"MessengerGraphQLThreadlistFetcher".to_string(),
),
("queries", queries),
("__user", self.user_id.clone()),
("__a", "1".to_string()),
("__req", "1".to_string()),
];
if let Some(revision) = self.client_revision {
form_fields.push(("__rev", revision.to_string()));
}
if let Some(fb_dtsg) = &self.fb_dtsg {
form_fields.push(("fb_dtsg", fb_dtsg.clone()));
}
if let Some(jazoest) = &self.jazoest {
form_fields.push(("jazoest", jazoest.clone()));
}
if let Some(lsd) = &self.lsd {
form_fields.push(("lsd", lsd.clone()));
}
let body = encode_form_fields(&form_fields);
let mut headers = self.base_headers()?;
headers.insert(reqwest::header::ACCEPT, HeaderValue::from_static("*/*"));
headers.insert(
reqwest::header::HeaderName::from_static("origin"),
HeaderValue::from_static("https://www.facebook.com"),
);
headers.insert(
reqwest::header::REFERER,
HeaderValue::from_static("https://www.facebook.com/"),
);
headers.insert(
reqwest::header::CONTENT_TYPE,
HeaderValue::from_static("application/x-www-form-urlencoded"),
);
let mut request = HttpRequest::post("https://www.facebook.com/api/graphqlbatch/")
.headers(headers)
.body(body.clone());
if let Some(lsd) = &self.lsd {
let lsd_header = HeaderValue::from_str(lsd)
.map_err(|error| MessengerError::State(format!("invalid lsd header: {error}")))?;
request = request.header(
reqwest::header::HeaderName::from_static("x-fb-lsd"),
lsd_header,
);
}
let request_url = "https://www.facebook.com/api/graphqlbatch/";
let text = self
.send_with_dump(
request,
HttpDumpContext {
label: "graphql_batch_sequence".to_string(),
method: "POST".to_string(),
url: request_url.to_string(),
request_headers: build_dump_headers(&[
("accept", "*/*"),
("origin", "https://www.facebook.com"),
("referer", "https://www.facebook.com/"),
("content-type", "application/x-www-form-urlencoded"),
]),
request_body: Some(body.clone()),
},
)
.await?;
if text.trim().is_empty() {
return Err(MessengerError::State(
"empty graphql batch response".to_string(),
));
}
extract_sequence_id_from_graphql_response(&text)
}
pub fn base_headers(&self) -> Result<HeaderMap, MessengerError> {
let mut headers = HeaderMap::new();
let cookie = HeaderValue::from_str(&self.cookie_header)
.map_err(|error| MessengerError::State(format!("invalid cookie header: {error}")))?;
let user_agent = HeaderValue::from_str(&self.user_agent)
.map_err(|error| MessengerError::State(format!("invalid user agent: {error}")))?;
headers.insert(COOKIE, cookie);
headers.insert(USER_AGENT, user_agent);
headers.insert(
reqwest::header::ACCEPT,
HeaderValue::from_static(
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
),
);
headers.insert(
reqwest::header::ACCEPT_LANGUAGE,
HeaderValue::from_static("en-US,en;q=0.9"),
);
headers.insert(
reqwest::header::ACCEPT_ENCODING,
HeaderValue::from_static("br, gzip, deflate"),
);
headers.insert(
reqwest::header::HeaderName::from_static("sec-fetch-dest"),
HeaderValue::from_static("document"),
);
headers.insert(
reqwest::header::HeaderName::from_static("sec-fetch-mode"),
HeaderValue::from_static("navigate"),
);
headers.insert(
reqwest::header::HeaderName::from_static("sec-fetch-site"),
HeaderValue::from_static("none"),
);
headers.insert(
reqwest::header::HeaderName::from_static("sec-fetch-user"),
HeaderValue::from_static("?1"),
);
headers.insert(
reqwest::header::UPGRADE_INSECURE_REQUESTS,
HeaderValue::from_static("1"),
);
Ok(headers)
}
async fn bootstrap(&mut self) -> Result<(), MessengerError> {
let bootstrap_pages = [
("facebook_home", "https://www.facebook.com/"),
("messenger_home", "https://www.messenger.com/"),
("facebook_messages", "https://www.facebook.com/messages/t/"),
];
let mut html_pages = Vec::new();
for (label, url) in bootstrap_pages {
let request = HttpRequest::get(url).headers(self.base_headers()?);
let html = self
.send_with_dump(
request,
HttpDumpContext {
label: label.to_string(),
method: "GET".to_string(),
url: url.to_string(),
request_headers: Vec::new(),
request_body: None,
},
)
.await?;
if !html.trim().is_empty() {
html_pages.push(html);
}
}
for html in &html_pages {
if self.user_name.is_none() {
self.user_name = extract_user_name(html).map_err(|error| {
MessengerError::State(format!("failed to extract user name from html: {error}"))
})?;
}
if self.fb_dtsg.is_none() {
self.fb_dtsg = extract_fb_dtsg(html)?;
}
if self.lsd.is_none() {
self.lsd = extract_lsd(html)?;
}
if self.jazoest.is_none() {
self.jazoest = extract_jazoest(html)?;
}
if self.client_revision.is_none() {
self.client_revision = extract_client_revision(html);
}
if self.region == "prn"
&& let Some(region) = extract_region(html)
{
self.region = region;
}
}
if self.user_name.is_none() {
trace!("user name token not found in bootstrap html");
}
match self.fetch_sequence_id().await {
Ok(sequence_id) => {
self.sequence_id = Some(sequence_id);
}
Err(error) => {
warn!(?error, "failed to fetch initial sequence id");
self.sequence_id = None;
}
}
Ok(())
}
async fn send_with_dump(
&self,
request: HttpRequest,
context: HttpDumpContext,
) -> Result<String, MessengerError> {
let response = self
.http
.request(request)
.await
.map_err(|error| MessengerError::State(format!("http request failed: {error}")))?;
let status = response.status;
let response_headers = response.headers;
let body = response.text;
if status == StatusCode::OK {
debug!(
label = context.label,
method = context.method,
url = context.url,
status = status.as_u16(),
"http request completed"
);
} else {
warn!(
label = context.label,
method = context.method,
url = context.url,
status = status.as_u16(),
"http request returned non-200 status"
);
}
self.dump_http_exchange(&context, status.as_u16(), &response_headers, &body)?;
if !status.is_success() {
return Err(MessengerError::State(format!(
"http request failed for {} {} with status {}",
context.method,
context.url,
status.as_u16()
)));
}
Ok(body)
}
fn dump_http_exchange(
&self,
context: &HttpDumpContext,
status: u16,
response_headers: &HeaderMap,
response_body: &str,
) -> Result<(), MessengerError> {
let request_id = Uuid::new_v4().simple().to_string();
let prefix = format!("{}_{}", context.label, request_id);
let mut request_dump = String::new();
request_dump.push_str(&context.method);
request_dump.push(' ');
request_dump.push_str(&context.url);
request_dump.push('\n');
request_dump.push_str("headers:\n");
let base_headers = self.base_headers()?;
for (name, value) in &base_headers {
if let Ok(value_str) = value.to_str() {
request_dump.push_str(name.as_str());
request_dump.push_str(": ");
request_dump.push_str(value_str);
request_dump.push('\n');
}
}
for (name, value) in &context.request_headers {
request_dump.push_str(name);
request_dump.push_str(": ");
request_dump.push_str(value);
request_dump.push('\n');
}
if let Some(body) = &context.request_body {
request_dump.push_str("\nbody:\n");
request_dump.push_str(body);
request_dump.push('\n');
}
let mut response_meta = String::new();
response_meta.push_str("status: ");
response_meta.push_str(&status.to_string());
response_meta.push('\n');
response_meta.push_str("headers:\n");
for (name, value) in response_headers {
if let Ok(value_str) = value.to_str() {
response_meta.push_str(name.as_str());
response_meta.push_str(": ");
response_meta.push_str(value_str);
response_meta.push('\n');
}
}
let request_path = self.http_dump_dir.join(format!("{prefix}.request.txt"));
let response_meta_path = self
.http_dump_dir
.join(format!("{prefix}.response_meta.txt"));
let response_body_path = self
.http_dump_dir
.join(format!("{prefix}.response_body.txt"));
fs::write(request_path, request_dump)?;
fs::write(response_meta_path, response_meta)?;
fs::write(response_body_path, response_body)?;
Ok(())
}
}
#[derive(Debug, Clone)]
struct HttpDumpContext {
label: String,
method: String,
url: String,
request_headers: Vec<(String, String)>,
request_body: Option<String>,
}
fn build_dump_headers(headers: &[(&str, &str)]) -> Vec<(String, String)> {
headers
.iter()
.map(|(name, value)| ((*name).to_string(), (*value).to_string()))
.collect()
}
fn build_http_client(
cookie_header: &str,
user_agent: &str,
proxy: Option<&str>,
) -> Result<HttpClient, MessengerError> {
let mut builder = rustybook_http::ClientBuilder::new()
.max_redirect(10)
.cookie_header(cookie_header.to_string())
.user_agent(user_agent.to_string());
if let Some(proxy_url) = proxy {
builder = builder.proxy(proxy_url.to_string());
}
builder
.build()
.map_err(|error| MessengerError::State(format!("failed to build http client: {error}")))
}
fn build_http_dump_dir() -> Result<PathBuf, MessengerError> {
let unix_ts = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|duration| duration.as_secs())
.unwrap_or(0);
let path = Path::new("storage").join(format!("messenger-http-debug-{unix_ts}"));
fs::create_dir_all(&path)?;
Ok(path)
}
fn extract_first(html: &str, patterns: &[&str]) -> Result<Option<String>, MessengerError> {
for pattern in patterns {
let regex = Regex::new(pattern)?;
if let Some(value) = regex
.captures(html)
.and_then(|captures| captures.get(1).map(|capture| capture.as_str().to_string()))
{
return Ok(Some(value));
}
}
Ok(None)
}
fn extract_fb_dtsg(html: &str) -> Result<Option<String>, MessengerError> {
extract_first(
html,
&[
r#""DTSGInitialData".*?"token":"([^"]+)""#,
r#""DTSGInitData"(?:\s*,\s*\[\])?(?:\s*,\s*)\{[^}]*"token"\s*:\s*"([^"]+)""#,
r#""fb_dtsg":\{"value":"([^"]+)""#,
r#"name="fb_dtsg"\s+value="([^"]+)""#,
],
)
}
fn extract_lsd(html: &str) -> Result<Option<String>, MessengerError> {
extract_first(
html,
&[
r#""LSD"\s*,\s*\[\s*\]\s*,\s*\{\s*"token"\s*:\s*"([^"]+)""#,
r#""LSD"\s*,\s*\[\s*\]\s*,\s*\{\s*"token"\s*:\s*"([A-Za-z0-9_-]+)""#,
r#""lsd":\{"token":"([^"]+)""#,
r#"name="lsd"\s+value="([^"]+)""#,
],
)
}
fn extract_jazoest(html: &str) -> Result<Option<String>, MessengerError> {
if let Some(value) = extract_first(
html,
&[
r#""jazoest":\{"value":"([^"]+)""#,
r#"name="jazoest"\s+value="([^"]+)""#,
],
)? {
return Ok(Some(value));
}
if let Some(fb_dtsg) = extract_fb_dtsg(html)? {
let sum: u32 = fb_dtsg.bytes().map(u32::from).sum();
return Ok(Some(format!("2{sum}")));
}
Ok(None)
}
fn extract_region(html: &str) -> Option<String> {
let regex = Regex::new("\"region\":\"([a-z]+)\"").ok()?;
regex
.captures(html)
.and_then(|captures| captures.get(1).map(|capture| capture.as_str().to_string()))
}
fn extract_client_revision(html: &str) -> Option<u64> {
let patterns = [
r#""client_revision"\s*:\s*(\d+)"?"#,
r#""__spin_r"\s*:\s*(\d+)"?"#,
];
for pattern in patterns {
let regex = Regex::new(pattern).ok()?;
if let Some(value) = regex
.captures(html)
.and_then(|captures| captures.get(1).map(|capture| capture.as_str().to_string()))
.and_then(|value| value.parse::<u64>().ok())
{
return Some(value);
}
}
None
}
fn encode_form_fields(fields: &[(&str, String)]) -> String {
let mut output = String::new();
for (index, (key, value)) in fields.iter().enumerate() {
if index > 0 {
output.push('&');
}
output.push_str(key);
output.push('=');
output.push_str(&url_encode_component(value));
}
output
}
fn url_encode_component(value: &str) -> String {
let mut output = String::with_capacity(value.len());
for byte in value.bytes() {
let is_unreserved =
byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_' | b'.' | b'~');
if is_unreserved {
output.push(byte as char);
} else {
output.push('%');
output.push_str(&format!("{byte:02X}"));
}
}
output
}
fn extract_sequence_id_from_graphql_response(text: &str) -> Result<u64, MessengerError> {
let parsed = strip_json_guard(text).trim();
if parsed.is_empty() {
return Err(MessengerError::State(
"empty graphql batch response".to_string(),
));
}
if let Ok(value) = serde_json::from_str::<Value>(parsed)
&& let Some(sequence_id) = find_sync_sequence_id(&value)
{
return Ok(sequence_id);
}
for line in parsed.lines() {
let line = line.trim();
if line.is_empty() {
continue;
}
let Ok(value) = serde_json::from_str::<Value>(line) else {
continue;
};
if let Some(sequence_id) = find_sync_sequence_id(&value) {
return Ok(sequence_id);
}
}
Err(MessengerError::State(
"missing sync_sequence_id from graphql response".to_string(),
))
}
fn find_sync_sequence_id(value: &Value) -> Option<u64> {
match value {
Value::Object(map) => {
if let Some(sequence) = map.get("sync_sequence_id")
&& let Some(id) = value_to_u64(sequence)
{
return Some(id);
}
for nested in map.values() {
if let Some(id) = find_sync_sequence_id(nested) {
return Some(id);
}
}
None
}
Value::Array(items) => {
for item in items {
if let Some(id) = find_sync_sequence_id(item) {
return Some(id);
}
}
None
}
_ => None,
}
}
fn value_to_u64(value: &Value) -> Option<u64> {
match value {
Value::Number(number) => number.as_u64(),
Value::String(text) => text.parse::<u64>().ok(),
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::{
encode_form_fields,
extract_sequence_id_from_graphql_response,
};
#[test]
fn extracts_sequence_id_from_array_response() {
let text = r#"for (;;);[{"o0":{"data":{"viewer":{"message_threads":{"sync_sequence_id":"12345"}}}}}]"#;
let sequence_id = extract_sequence_id_from_graphql_response(text)
.unwrap_or_else(|error| panic!("failed to extract sequence id: {error}"));
assert_eq!(sequence_id, 12345);
}
#[test]
fn extracts_sequence_id_from_json_lines_response() {
let text = r#"for (;;);
{"o0":{"data":{"viewer":{"message_threads":{"sync_sequence_id":"777"}}}}}
{"q0":{"response":{"ok":true}}}
"#;
let sequence_id = extract_sequence_id_from_graphql_response(text)
.unwrap_or_else(|error| panic!("failed to extract sequence id: {error}"));
assert_eq!(sequence_id, 777);
}
#[test]
fn encodes_form_fields() {
let fields = vec![("a", "value".to_string()), ("b", "a+b&c=d".to_string())];
let encoded = encode_form_fields(&fields);
assert_eq!(encoded, "a=value&b=a%2Bb%26c%3Dd");
}
}