interpolize 1.0.0

a rust program that scrapes discord, learns how your friends talk, and generates new messages in their collective voice. yes, this is what we're doing with our lives.
use anyhow::{Result, anyhow};
use reqwest::{Client, header::{HeaderMap, HeaderValue}};
use serde::Deserialize;
use crate::config::{Config, Channel};
use crate::storage::Message;

#[derive(Deserialize)]
struct DiscordMessage {
    id: String,
    content: String,
    timestamp: String,
    author: DiscordAuthor,
    referenced_message: Option<Box<DiscordMessage>>,
}

#[derive(Deserialize)]
struct DiscordAuthor {
    id: String,
    username: String,
}

pub struct Scraper {
    client: Client,
    token: String,
}

impl Scraper {
    pub fn new(config: &Config) -> Result<Self> {
        let mut headers = HeaderMap::new();
        headers.insert("User-Agent", HeaderValue::from_static(
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0"
        ));
        headers.insert("Accept", HeaderValue::from_static("*/*"));
        headers.insert("Accept-Language", HeaderValue::from_static("en-US,en;q=0.5"));
        headers.insert("X-Discord-Locale", HeaderValue::from_static("en-US"));
        headers.insert("X-Debug-Options", HeaderValue::from_static("bugReporterEnabled"));
        headers.insert("Sec-Fetch-Dest", HeaderValue::from_static("empty"));
        headers.insert("Sec-Fetch-Mode", HeaderValue::from_static("cors"));
        headers.insert("Sec-Fetch-Site", HeaderValue::from_static("same-origin"));

        let client = Client::builder()
            .default_headers(headers)
            .build()?;

        Ok(Self {
            client,
            token: config.discord.token.clone(),
        })
    }

    pub async fn scrape_channel(&self, channel: &Channel) -> Result<Vec<Message>> {
        let mut messages = Vec::new();
        let mut before: Option<String> = None;
        let mut fetched = 0;

        loop {
            let remaining = channel.scrape_limit - fetched;
            if remaining == 0 { break; }
            let limit = remaining.min(100);

            let mut url = format!(
                "https://discord.com/api/v9/channels/{}/messages?limit={}",
                channel.id, limit
            );
            if let Some(ref b) = before {
                url.push_str(&format!("&before={}", b));
            }

            let resp = self.client
                .get(&url)
                .header("Authorization", &self.token)
                .send()
                .await?;

            if !resp.status().is_success() {
                return Err(anyhow!("discord api error: {}", resp.status()));
            }

            let batch: Vec<DiscordMessage> = resp.json().await?;
            if batch.is_empty() { break; }

            before = Some(batch.last().unwrap().id.clone());
            fetched += batch.len();

            for dm in batch {
                if dm.content.is_empty() { continue; }
                messages.push(Message {
                    id: dm.id,
                    channel_id: channel.id.clone(),
                    channel_name: channel.name.clone(),
                    author_id: dm.author.id,
                    author_name: dm.author.username,
                    content: dm.content,
                    timestamp: dm.timestamp,
                    reply_to: dm.referenced_message.map(|r| r.id).unwrap_or_default(),
                });
            }

            tokio::time::sleep(std::time::Duration::from_millis(500)).await;
        }

        Ok(messages)
    }

    pub async fn scrape_all(&self, config: &Config) -> Result<Vec<Message>> {
        let mut all = Vec::new();
        for channel in &config.channels {
            eprintln!("scraping #{} ...", channel.name);
            let msgs = self.scrape_channel(channel).await?;
            eprintln!("  got {} msgs", msgs.len());
            all.extend(msgs);
        }
        Ok(all)
    }
}