kagi-sdk 0.1.0

Rust-first Kagi SDK with explicit official-api and session-web surfaces
Documentation
use scraper::{Html, Selector};

use crate::{
    error::KagiError,
    routing::{EndpointId, ParserShape},
    session_web::models::{SearchResponse, SearchResult},
};

pub fn parse_html_search_response(
    endpoint: EndpointId,
    html_body: &str,
) -> Result<SearchResponse, KagiError> {
    let document = Html::parse_document(html_body);

    let item_selectors = [
        "div.search-result",
        "div.__sri",
        "div.__srgi",
        "section.__srgi",
    ];
    let title_selector = parse_selector("a.__sri_title_link")?;
    let snippet_selector = parse_selector(".__sri-desc")?;

    let mut parsed_results = Vec::new();

    for item_selector in item_selectors {
        let selector = parse_selector(item_selector)?;
        for item in document.select(&selector) {
            let Some(link) = item.select(&title_selector).next() else {
                continue;
            };
            let Some(href) = link.value().attr("href") else {
                continue;
            };

            if !href.starts_with("http://") && !href.starts_with("https://") {
                continue;
            }

            let title = collect_text(&link);
            if title.is_empty() {
                continue;
            }

            let snippet = item
                .select(&snippet_selector)
                .next()
                .map(|node| collect_text(&node))
                .filter(|text| !text.is_empty());

            parsed_results.push(SearchResult {
                title,
                url: href.to_string(),
                snippet,
            });
        }

        if !parsed_results.is_empty() {
            return Ok(SearchResponse {
                results: deduplicate_results(parsed_results),
            });
        }
    }

    if looks_like_empty_search_results(&document, html_body)? {
        return Ok(SearchResponse {
            results: Vec::new(),
        });
    }

    Err(KagiError::ResponseParse {
        endpoint,
        parser: ParserShape::Html,
        reason: "response did not contain Kagi search-result markers (`.__sri_title_link`)"
            .to_string(),
    })
}

fn parse_selector(raw: &str) -> Result<Selector, KagiError> {
    Selector::parse(raw).map_err(|source| KagiError::InvalidClientConfiguration {
        reason: format!("invalid built-in selector `{raw}`: {source}"),
    })
}

fn collect_text(element: &scraper::ElementRef<'_>) -> String {
    element
        .text()
        .collect::<Vec<_>>()
        .join(" ")
        .split_whitespace()
        .collect::<Vec<_>>()
        .join(" ")
}

fn deduplicate_results(results: Vec<SearchResult>) -> Vec<SearchResult> {
    let mut deduped = Vec::with_capacity(results.len());

    for result in results {
        let exists = deduped.iter().any(|existing: &SearchResult| {
            existing.url == result.url && existing.title == result.title
        });

        if !exists {
            deduped.push(result);
        }
    }

    deduped
}

fn looks_like_empty_search_results(document: &Html, raw_html: &str) -> Result<bool, KagiError> {
    let empty_state_selector = parse_selector(
        ".search-no-results, .__search-no-results, .__empty-results, .__no-results",
    )?;
    if document.select(&empty_state_selector).next().is_some() {
        return Ok(true);
    }

    let normalized = raw_html.to_ascii_lowercase();
    let has_no_results_phrase = normalized.contains("no results")
        || normalized.contains("no results found")
        || normalized.contains("did not match any documents");

    let has_search_page_markers = normalized.contains("/html/search")
        || normalized.contains("name=\"q\"")
        || normalized.contains("search-input");

    Ok(has_no_results_phrase && has_search_page_markers)
}