use crate::cost;
use crate::db::{BirdClient, RequestContext};
use crate::diag;
use crate::fields;
use crate::output;
use crate::requirements::AuthType;
use std::collections::HashSet;
pub struct SearchOpts<'a> {
pub query: &'a str,
pub pretty: bool,
pub sort: &'a str,
pub min_likes: Option<u64>,
pub max_results: u32,
pub pages: u32,
}
pub fn run_search(
client: &mut BirdClient,
opts: SearchOpts<'_>,
use_color: bool,
quiet: bool,
auth_type: &AuthType,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
if !matches!(opts.sort, "recent" | "likes") {
return Err(format!(
"invalid --sort value \"{}\"; expected: recent, likes",
opts.sort
)
.into());
}
let effective_query = apply_noise_reduction(opts.query);
let ctx = RequestContext {
auth_type,
username: None,
};
let mut all_tweets: Vec<serde_json::Value> = Vec::new();
let mut seen_ids: HashSet<String> = HashSet::new();
let mut all_users: Vec<serde_json::Value> = Vec::new();
let mut seen_user_ids: HashSet<String> = HashSet::new();
let mut next_token: Option<String> = None;
let mut pages_fetched: u32 = 0;
for page_num in 1..=opts.pages {
let url = build_search_url(&effective_query, opts.max_results, next_token.as_deref());
let response = client.get(&url, &ctx)?;
if !response.is_success() {
return Err(format!(
"GET search {}: {}",
response.status,
output::sanitize_for_stderr(&response.body, 200)
)
.into());
}
let page = response.json.ok_or("invalid JSON from search")?;
let estimate = cost::estimate_cost(&page, &url, response.cache_hit);
cost::display_cost(&estimate, use_color, quiet);
let data = match page.get("data").and_then(|d| d.as_array()) {
Some(arr) if !arr.is_empty() => arr,
_ => break,
};
let before = all_tweets.len();
for tweet in data {
let id = tweet.get("id").and_then(|v| v.as_str()).unwrap_or("");
if id.is_empty() || seen_ids.contains(id) {
continue;
}
if is_retweet(tweet) {
continue;
}
if let Some(min) = opts.min_likes
&& extract_metric(tweet, "like_count") < min
{
continue;
}
seen_ids.insert(id.to_string());
all_tweets.push(tweet.clone());
}
let passed = all_tweets.len() - before;
pages_fetched = page_num;
if let Some(includes) = page.get("includes")
&& let Some(users) = includes.get("users").and_then(|u| u.as_array())
{
for user in users {
let uid = user.get("id").and_then(|v| v.as_str()).unwrap_or("");
if !uid.is_empty() && seen_user_ids.insert(uid.to_string()) {
all_users.push(user.clone());
}
}
}
diag!(
quiet,
"[search] page {}/{}: {} new tweets ({} total)",
page_num,
opts.pages,
passed,
all_tweets.len()
);
next_token = page
.get("meta")
.and_then(|m| m.get("next_token"))
.and_then(|t| t.as_str())
.map(String::from);
if next_token.is_none() {
break;
}
if page_num < opts.pages {
std::thread::sleep(std::time::Duration::from_millis(150));
}
}
sort_tweets(&mut all_tweets, opts.sort);
let output = serde_json::json!({
"data": all_tweets,
"includes": { "users": all_users },
});
if opts.pretty {
println!("{}", serde_json::to_string_pretty(&output)?);
} else {
println!("{}", serde_json::to_string(&output)?);
}
diag!(
quiet,
"[search] {} results | sorted by {} | {} pages fetched",
all_tweets.len(),
opts.sort,
pages_fetched
);
Ok(())
}
fn build_search_url(query: &str, max_results: u32, next_token: Option<&str>) -> String {
let mut url = url::Url::parse("https://api.x.com/2/tweets/search/recent").unwrap();
{
let mut pairs = url.query_pairs_mut();
pairs.append_pair("query", query);
for (key, value) in fields::tweet_query_params() {
pairs.append_pair(key, value);
}
pairs.append_pair("max_results", &max_results.to_string());
}
if let Some(token) = next_token {
url.query_pairs_mut().append_pair("next_token", token);
}
url.to_string()
}
fn apply_noise_reduction(query: &str) -> String {
let has_retweet_op = query
.split_whitespace()
.any(|t| t == "is:retweet" || t == "-is:retweet");
if has_retweet_op {
query.to_string()
} else {
format!("{} -is:retweet", query)
}
}
fn is_retweet(tweet: &serde_json::Value) -> bool {
tweet
.get("referenced_tweets")
.and_then(|rt| rt.as_array())
.map(|arr| {
arr.iter()
.any(|r| r.get("type").and_then(|t| t.as_str()) == Some("retweeted"))
})
.unwrap_or(false)
}
fn extract_metric(tweet: &serde_json::Value, metric_name: &str) -> u64 {
tweet
.get("public_metrics")
.and_then(|m| m.get(metric_name))
.and_then(|v| v.as_u64())
.unwrap_or(0)
}
fn sort_tweets(tweets: &mut [serde_json::Value], sort_by: &str) {
match sort_by {
"recent" => {} "likes" => tweets.sort_by(|a, b| {
let a_likes = extract_metric(a, "like_count");
let b_likes = extract_metric(b, "like_count");
b_likes.cmp(&a_likes)
}),
_ => {} }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn noise_reduction_appends_filter() {
assert_eq!(apply_noise_reduction("rust lang"), "rust lang -is:retweet");
}
#[test]
fn noise_reduction_skips_when_present() {
assert_eq!(apply_noise_reduction("rust is:retweet"), "rust is:retweet");
assert_eq!(
apply_noise_reduction("rust -is:retweet"),
"rust -is:retweet"
);
}
#[test]
fn build_url_basic() {
let url = build_search_url("rust lang", 100, None);
assert!(url.starts_with("https://api.x.com/2/tweets/search/recent?"));
assert!(url.contains("query=rust+lang"));
assert!(url.contains("max_results=100"));
assert!(url.contains("tweet.fields="));
assert!(!url.contains("next_token="));
}
#[test]
fn build_url_with_next_token() {
let url = build_search_url("test", 50, Some("abc123"));
assert!(url.contains("next_token=abc123"));
}
#[test]
fn build_url_escapes_query() {
let url = build_search_url("test&evil=true", 100, None);
assert!(url.contains("query=test%26evil%3Dtrue"));
}
#[test]
fn is_retweet_detects_retweet() {
let tweet = serde_json::json!({
"id": "1",
"referenced_tweets": [{"type": "retweeted", "id": "2"}]
});
assert!(is_retweet(&tweet));
}
#[test]
fn is_retweet_passes_original() {
let tweet = serde_json::json!({"id": "1", "text": "hello"});
assert!(!is_retweet(&tweet));
}
#[test]
fn is_retweet_passes_quote() {
let tweet = serde_json::json!({
"id": "1",
"referenced_tweets": [{"type": "quoted", "id": "2"}]
});
assert!(!is_retweet(&tweet));
}
#[test]
fn extract_metric_returns_value() {
let tweet = serde_json::json!({
"public_metrics": {"like_count": 42, "retweet_count": 5}
});
assert_eq!(extract_metric(&tweet, "like_count"), 42);
assert_eq!(extract_metric(&tweet, "retweet_count"), 5);
}
#[test]
fn extract_metric_missing_returns_zero() {
let tweet = serde_json::json!({"id": "1"});
assert_eq!(extract_metric(&tweet, "like_count"), 0);
}
#[test]
fn sort_by_likes() {
let mut tweets = vec![
serde_json::json!({"id": "1", "public_metrics": {"like_count": 5}}),
serde_json::json!({"id": "2", "public_metrics": {"like_count": 100}}),
serde_json::json!({"id": "3", "public_metrics": {"like_count": 20}}),
];
sort_tweets(&mut tweets, "likes");
assert_eq!(tweets[0]["id"], "2");
assert_eq!(tweets[1]["id"], "3");
assert_eq!(tweets[2]["id"], "1");
}
#[test]
fn sort_by_recent_is_noop() {
let mut tweets = vec![
serde_json::json!({"id": "3"}),
serde_json::json!({"id": "1"}),
serde_json::json!({"id": "2"}),
];
let original = tweets.clone();
sort_tweets(&mut tweets, "recent");
assert_eq!(tweets, original);
}
#[test]
fn noise_reduction_ignores_substrings() {
assert_eq!(
apply_noise_reduction("crisis:retweet analysis"),
"crisis:retweet analysis -is:retweet"
);
}
}