use std::fmt::Write as _;
use super::formalize::CANONICAL_FISHERMAN_SYNOPSIS;
use super::meaning_detail::{self, CONCEPTS};
use super::planner::CANONICAL_SOURCE_URL;
struct CorpusPage {
url: String,
title: String,
keywords: Vec<String>,
body: String,
}
fn pages() -> Vec<CorpusPage> {
let mut pages = vec![CorpusPage {
url: CANONICAL_SOURCE_URL.to_owned(),
title: "Сказка о рыбаке и рыбке — Александр Пушкин (Викитека)".to_owned(),
keywords: [
"рыбак",
"рыбке",
"рыбка",
"пушкин",
"сказка",
"fisherman",
"fish",
"pushkin",
]
.iter()
.map(|keyword| (*keyword).to_owned())
.collect(),
body: CANONICAL_FISHERMAN_SYNOPSIS.to_owned(),
}];
for concept in CONCEPTS {
let mut keywords: Vec<String> = concept.keywords.iter().map(|k| k.to_lowercase()).collect();
keywords.extend(
["wikidata", "lexeme", "grammatical", "singular", "plural"]
.iter()
.map(|k| (*k).to_owned()),
);
pages.push(CorpusPage {
url: concept.source_url.to_owned(),
title: format!(
"Wikidata lexemes for the {} concept ({}) — Wikidata",
concept.name, concept.grounded_in
),
keywords,
body: meaning_detail::source_bundle(concept),
});
}
pages
}
#[must_use]
pub fn web_search(query: &str) -> String {
let lower = query.to_lowercase();
let all = pages();
let hits: Vec<&CorpusPage> = all
.iter()
.filter(|page| {
page.keywords
.iter()
.any(|keyword| lower.contains(keyword.as_str()))
})
.collect();
if hits.is_empty() {
return format!("web_search: no results for {query:?}");
}
let mut out = String::new();
for (rank, page) in hits.iter().enumerate() {
let snippet: String = page.body.chars().take(80).collect();
let _ = writeln!(
out,
"{}. {}\n {}\n {}…",
rank + 1,
page.title,
page.url,
snippet.trim()
);
}
out.trim_end().to_owned()
}
#[must_use]
pub fn web_fetch(url: &str) -> String {
pages().iter().find(|page| page.url == url).map_or_else(
|| format!("web_fetch error: 404 not found for {url}"),
|page| page.body.clone(),
)
}