use super::{HandleError, RawResponse, get_or, get_pool, handler};
use crate::CONFIG;
use crate::models::chunk::ChunkDetail;
use crate::models::repo::{self, RepoOperation};
use ragit::{
ChunkSource,
Index,
Keywords,
LoadMode,
TfidfResult,
UidQueryConfig,
};
use ragit_fs::{join3, write_log};
use std::collections::HashMap;
use warp::reply::{Reply, json};
pub async fn search(user: String, repo: String, query: HashMap<String, String>, api_key: Option<String>) -> Box<dyn Reply> {
handler(search_(user, repo, query, api_key).await)
}
pub async fn search_(user: String, repo: String, query: HashMap<String, String>, api_key: Option<String>) -> RawResponse {
let pool = get_pool().await;
let repo_id = repo::get_id(&user, &repo, pool).await.handle_error(404)?;
repo::check_auth(repo_id, RepoOperation::Read, api_key, pool).await.handle_error(500)?.handle_error(404)?;
let config = CONFIG.get().handle_error(500)?;
let rag_path = join3(
&config.repo_data_dir,
&user,
&repo,
).handle_error(400)?;
let index = Index::load(rag_path, LoadMode::OnlyJson).handle_error(404)?;
write_log(
"search",
&format!("search({user:?}, {repo:?}, {query:?})"),
);
let limit = get_or(&query, "limit", 100);
let offset = get_or(&query, "offset", 0);
let file = get_or(&query, "file", String::new());
let dir = if !file.ends_with("/") { format!("{file}/") } else { file.clone() };
let uid_prefix = get_or(&query, "uid", String::new());
let keywords = get_or(&query, "query", String::new());
let tokenized_keywords = Keywords::from_raw(vec![keywords.to_string()]);
let mut has_to_sort_by_file = true;
let mut has_to_search_by_keywords = keywords != "";
let mut is_limit_applied = false;
let mut is_offset_applied = false;
let mut chunks = if uid_prefix != "" {
let query_result = index
.uid_query(&[uid_prefix.to_string()], UidQueryConfig::new().chunk_only())
.handle_error(500)?
.get_chunk_uids();
let mut chunks = Vec::with_capacity(query_result.len());
for uid in query_result.iter() {
chunks.push(index.get_chunk_by_uid(*uid).handle_error(500)?);
}
if file != "" {
chunks = chunks.into_iter().filter(
|chunk| match &chunk.source {
ChunkSource::File { path, .. } => path == &file || path.starts_with(&dir),
}
).collect();
}
chunks
}
else if file != "" {
let chunk_uids = match index.processed_files.get(&file) {
Some(uid) => index.get_chunks_of_file(*uid).handle_error(500)?,
None => {
let mut chunk_uids = vec![];
for (file, uid) in index.processed_files.iter() {
if file.starts_with(&dir) {
chunk_uids.append(&mut index.get_chunks_of_file(*uid).handle_error(500)?);
}
}
chunk_uids
},
};
let mut chunks = Vec::with_capacity(chunk_uids.len());
for uid in chunk_uids.iter() {
chunks.push(index.get_chunk_by_uid(*uid).handle_error(500)?);
}
chunks
}
else if keywords != "" {
let result = index.run_tfidf(
tokenized_keywords.clone(),
limit + offset,
).handle_error(500)?;
has_to_search_by_keywords = false;
has_to_sort_by_file = false;
let mut chunks = Vec::with_capacity(limit.min(result.len()));
for (i, TfidfResult { id: uid, score: _ }) in result.iter().enumerate() {
if i >= offset {
chunks.push(index.get_chunk_by_uid(*uid).handle_error(500)?);
}
}
is_offset_applied = true;
is_limit_applied = true;
chunks
}
else {
let mut processed_files = index.processed_files.iter().collect::<Vec<_>>();
processed_files.sort_by_key(|(file, _)| file.to_string());
let mut chunk_uids = Vec::with_capacity(limit + offset);
for (_, uid) in processed_files.iter() {
chunk_uids.append(&mut index.get_chunks_of_file(**uid).handle_error(500)?);
if chunk_uids.len() > limit + offset {
break;
}
}
let mut chunks = Vec::with_capacity(chunk_uids.len());
for chunk_uid in chunk_uids.iter() {
chunks.push(index.get_chunk_by_uid(*chunk_uid).handle_error(500)?);
}
chunks
};
if has_to_search_by_keywords {
let result = index.run_tfidf_on(
&chunks.iter().map(|chunk| chunk.uid).collect::<Vec<_>>(),
tokenized_keywords,
limit + offset,
).handle_error(500)?;
let mut chunks_ = Vec::with_capacity(limit.min(result.len()));
for (i, TfidfResult { id: uid, score: _ }) in result.iter().enumerate() {
if i >= offset {
chunks_.push(index.get_chunk_by_uid(*uid).handle_error(500)?);
}
}
has_to_sort_by_file = false;
is_offset_applied = true;
is_limit_applied = true;
chunks = chunks_;
}
if has_to_sort_by_file {
chunks.sort_by_key(|chunk| chunk.sortable_string());
}
if !is_offset_applied {
if offset >= chunks.len() {
chunks = vec![];
}
else {
chunks = chunks[offset..].to_vec();
}
}
if !is_limit_applied && chunks.len() > limit {
chunks = chunks[..limit].to_vec();
}
let chunks = chunks.into_iter().map(|c| ChunkDetail::from(c)).collect::<Vec<_>>();
Ok(Box::new(json(&chunks)))
}