use crate::types::CodeChunk;
use anyhow::{Context, Result};
use futures_util::stream::{FuturesOrdered, StreamExt};
use serde::{Deserialize, Serialize};
use std::time::Duration;
const CHUNK_PAGE_LIMIT: usize = 1000;
const CHUNK_PAGE_CONCURRENCY: usize = 4;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndexSummary {
pub id: String,
}
#[derive(Clone)]
pub struct TrustySearchClient {
base_url: String,
http: reqwest::Client,
}
impl TrustySearchClient {
pub fn new(base_url: impl Into<String>) -> Self {
let mut base = base_url.into();
if base.ends_with('/') {
base.pop();
}
let http = reqwest::ClientBuilder::new()
.tcp_keepalive(Duration::from_secs(60))
.pool_max_idle_per_host(CHUNK_PAGE_CONCURRENCY)
.http2_prior_knowledge() .timeout(Duration::from_secs(30))
.connect_timeout(Duration::from_secs(5))
.build()
.expect("failed to build HTTP client");
Self {
base_url: base,
http,
}
}
pub fn base_url(&self) -> &str {
&self.base_url
}
pub async fn health(&self) -> Result<bool> {
let url = format!("{}/health", self.base_url);
let resp = self.http.get(&url).send().await.context("GET /health")?;
Ok(resp.status().is_success())
}
pub async fn list_indexes(&self) -> Result<Vec<IndexSummary>> {
#[derive(Deserialize)]
struct Listing {
indexes: Vec<String>,
}
let url = format!("{}/indexes", self.base_url);
let body: Listing = self
.http
.get(&url)
.send()
.await
.with_context(|| format!("GET {url}"))?
.error_for_status()
.with_context(|| format!("non-2xx from {url}"))?
.json()
.await
.with_context(|| format!("decode {url}"))?;
Ok(body
.indexes
.into_iter()
.map(|id| IndexSummary { id })
.collect())
}
pub async fn get_chunks(&self, index_id: &str) -> Result<Vec<CodeChunk>> {
let base = format!("{}/indexes/{}/chunks", self.base_url, index_id);
let mut all_chunks: Vec<CodeChunk> = Vec::new();
let mut next_offset: usize = 0;
let mut exhausted = false;
while !exhausted {
let mut window: FuturesOrdered<_> = (0..CHUNK_PAGE_CONCURRENCY)
.map(|i| {
let offset = next_offset + i * CHUNK_PAGE_LIMIT;
fetch_chunk_page(&self.http, &base, offset, CHUNK_PAGE_LIMIT)
})
.collect();
let mut window_consumed: usize = 0;
while let Some(page) = window.next().await {
let page = page?;
let received = page.len();
all_chunks.extend(page);
window_consumed += 1;
if received < CHUNK_PAGE_LIMIT {
exhausted = true;
while let Some(extra) = window.next().await {
all_chunks.extend(extra?);
}
break;
}
}
if !exhausted {
next_offset += window_consumed * CHUNK_PAGE_LIMIT;
}
}
Ok(all_chunks)
}
}
async fn fetch_chunk_page(
http: &reqwest::Client,
base_url: &str,
offset: usize,
limit: usize,
) -> Result<Vec<CodeChunk>> {
#[derive(Deserialize)]
struct ChunksBody {
chunks: Vec<CodeChunk>,
}
let url = format!("{base_url}?offset={offset}&limit={limit}");
let body: ChunksBody = http
.get(&url)
.send()
.await
.with_context(|| format!("GET {url}"))?
.error_for_status()
.with_context(|| format!("non-2xx from {url}"))?
.json()
.await
.with_context(|| format!("decode {url}"))?;
Ok(body.chunks)
}