1use axum::Json;
13use axum::extract::{Path, Query, State};
14use crw_core::error::CrwError;
15use crw_core::research_types::{
16 GithubResponse, PaperMetaResponse, PapersResponse, ReadPaperResponse, ResearchGithubItem,
17 ResearchPassage, SimilarResponse,
18};
19use crw_search::research::{self, Mode, PaperHit, ResearchKeys, SearchFilters};
20use crw_search::{SearxngClient, SearxngParams};
21use serde::Deserialize;
22use std::sync::Arc;
23
24use crate::error::AppError;
25use crate::state::AppState;
26
27const DEFAULT_K: usize = 40;
28const MAX_K: usize = 100;
29
30fn keys(state: &AppState) -> ResearchKeys<'_> {
31 ResearchKeys {
32 openalex_key: state.config.search.openalex_api_key.as_deref(),
33 openalex_mailto: state.config.search.openalex_mailto.as_deref(),
34 s2_key: state.config.search.s2_api_key.as_deref(),
35 }
36}
37
38fn searxng(state: &AppState) -> Result<Arc<SearxngClient>, CrwError> {
39 state.searxng.as_ref().cloned().ok_or_else(|| {
40 CrwError::SearchDisabled(
41 "Search is disabled. Set [search].searxng_url or CRW_SEARCH__SEARXNG_URL.".into(),
42 )
43 })
44}
45
46fn clamp_k(k: Option<usize>) -> usize {
47 k.unwrap_or(DEFAULT_K).clamp(1, MAX_K)
48}
49
50fn join_nonempty(v: &[String]) -> Option<String> {
53 if v.is_empty() {
54 None
55 } else {
56 Some(v.join(","))
57 }
58}
59
60async fn searxng_papers(
63 client: &SearxngClient,
64 engines: Option<String>,
65 query: &str,
66) -> Vec<PaperHit> {
67 let params = SearxngParams {
68 q: query.to_string(),
69 categories: None,
70 language: Some("en".to_string()),
71 time_range: None,
72 engines,
73 pageno: None,
74 safesearch: None,
75 };
76 let Ok(resp) = client.fetch(¶ms).await else {
77 return Vec::new();
78 };
79 resp.results
80 .into_iter()
81 .filter_map(|r| {
82 let title = r.title.clone().unwrap_or_default();
83 let blob = format!(
84 "{} {} {}",
85 r.url.unwrap_or_default(),
86 title,
87 r.content.unwrap_or_default()
88 );
89 PaperHit::from_searxng(&title, &blob, r.score.unwrap_or(0.0))
90 })
91 .collect()
92}
93
94#[derive(Deserialize)]
95pub struct PapersQuery {
96 query: String,
97 k: Option<usize>,
98 authors: Option<String>,
99 categories: Option<String>,
100 from: Option<String>,
101 to: Option<String>,
102}
103
104pub async fn search_papers(
108 State(state): State<AppState>,
109 Query(q): Query<PapersQuery>,
110) -> Result<Json<PapersResponse>, AppError> {
111 let client = searxng(&state)?;
112 let k = clamp_k(q.k);
113 let f = SearchFilters {
114 authors: q.authors,
115 categories: q.categories,
116 from: q.from,
117 to: q.to,
118 };
119 let kz = keys(&state);
120 let research_engines = join_nonempty(&state.config.search.research_engines);
121 let (web, scholar, oa_ss) = tokio::join!(
123 searxng_papers(&client, None, &q.query),
124 searxng_papers(&client, research_engines, &q.query),
125 research::search_papers_pools(&kz, &q.query, k, &f),
126 );
127 let mut pools = vec![web, scholar];
128 pools.extend(oa_ss);
129 let results = research::merge_rank(pools, k);
130 Ok(Json(PapersResponse {
131 success: true,
132 results,
133 }))
134}
135
136#[derive(Deserialize)]
137pub struct PaperQuery {
138 query: Option<String>,
139 k: Option<usize>,
140}
141
142pub async fn get_paper(
151 State(state): State<AppState>,
152 Path(id): Path<String>,
153 Query(q): Query<PaperQuery>,
154) -> Result<Json<serde_json::Value>, AppError> {
155 let kz = keys(&state);
156 let meta = research::inspect(&kz, &id)
157 .await
158 .ok_or_else(|| CrwError::NotFound(format!("paper not found: {id}")))?;
159 match q.query {
160 None => Ok(Json(
161 serde_json::to_value(PaperMetaResponse {
162 success: true,
163 paper: meta,
164 })
165 .map_err(|e| CrwError::Internal(e.to_string()))?,
166 )),
167 Some(query) => {
168 let k = q.k.unwrap_or(4).clamp(1, 20);
169 let passages = rank_abstract_passages(meta.abstract_.as_deref(), &query, k);
170 Ok(Json(
171 serde_json::to_value(ReadPaperResponse {
172 success: true,
173 paper_id: meta.paper_id.clone(),
174 query,
175 passages,
176 paper: meta,
177 })
178 .map_err(|e| CrwError::Internal(e.to_string()))?,
179 ))
180 }
181 }
182}
183
184fn rank_abstract_passages(abstract_: Option<&str>, query: &str, k: usize) -> Vec<ResearchPassage> {
187 let Some(text) = abstract_ else {
188 return Vec::new();
189 };
190 let qterms: Vec<String> = query
191 .to_lowercase()
192 .split_whitespace()
193 .filter(|w| w.len() > 2)
194 .map(String::from)
195 .collect();
196 let mut scored: Vec<ResearchPassage> = text
197 .split(['.', '!', '?'])
198 .map(str::trim)
199 .filter(|s| s.len() > 20)
200 .map(|sentence| {
201 let low = sentence.to_lowercase();
202 let hits = qterms.iter().filter(|t| low.contains(*t)).count();
203 let score = if qterms.is_empty() {
204 0.0
205 } else {
206 hits as f64 / qterms.len() as f64
207 };
208 ResearchPassage {
209 text: sentence.to_string(),
210 score,
211 }
212 })
213 .collect();
214 scored.sort_by(|a, b| {
215 b.score
216 .partial_cmp(&a.score)
217 .unwrap_or(std::cmp::Ordering::Equal)
218 });
219 scored.truncate(k);
220 scored
221}
222
223#[derive(Deserialize)]
224pub struct SimilarQuery {
225 intent: Option<String>,
226 mode: Option<String>,
227 k: Option<usize>,
228}
229
230pub async fn similar(
232 State(state): State<AppState>,
233 Path(id): Path<String>,
234 Query(q): Query<SimilarQuery>,
235) -> Result<Json<SimilarResponse>, AppError> {
236 if q.intent.as_deref().unwrap_or("").trim().is_empty() {
238 return Err(CrwError::InvalidRequest("`intent` is required".into()).into());
239 }
240 let k = clamp_k(q.k);
241 let mode = match q.mode.as_deref() {
242 Some("citers") => Mode::Citers,
243 Some("references") => Mode::References,
244 _ => Mode::Similar,
245 };
246 let kz = keys(&state);
247 let results = research::related(&kz, &id, mode, k).await;
248 let pool_size = results.len();
249 Ok(Json(SimilarResponse {
250 success: true,
251 results,
252 pool_size,
253 truncated: pool_size >= k,
254 note: None,
255 }))
256}
257
258#[derive(Deserialize)]
259pub struct GithubQuery {
260 query: String,
261 k: Option<usize>,
262}
263
264pub async fn github(
269 State(state): State<AppState>,
270 Query(q): Query<GithubQuery>,
271) -> Result<Json<GithubResponse>, AppError> {
272 let client = searxng(&state)?;
273 let k = q.k.unwrap_or(20).clamp(1, 100);
274 let params = SearxngParams {
275 q: q.query,
276 categories: None,
277 language: Some("en".to_string()),
278 time_range: None,
279 engines: join_nonempty(&state.config.search.github_engines),
280 pageno: None,
281 safesearch: None,
282 };
283 let resp = client
284 .fetch(¶ms)
285 .await
286 .map_err(|e| CrwError::HttpError(format!("github search failed: {e}")))?;
287 let results: Vec<ResearchGithubItem> = resp
288 .results
289 .into_iter()
290 .take(k)
291 .filter_map(|r| {
292 let url = r.url?;
293 let repo = url
295 .split("github.com/")
296 .nth(1)
297 .map(|p| p.split('/').take(2).collect::<Vec<_>>().join("/"))
298 .unwrap_or_default();
299 Some(ResearchGithubItem {
300 result_type: "repo_readme".to_string(),
301 repo,
302 url,
303 page_type: None,
304 number: None,
305 segment_count: None,
306 readme_url: None,
307 title: r.title.unwrap_or_default(),
308 snippet: r.content.clone().unwrap_or_default(),
309 content_md: r.content,
310 })
311 })
312 .collect();
313 Ok(Json(GithubResponse {
314 success: true,
315 results,
316 }))
317}