pub use crate::output::SyntheticFile;
use anyhow::{bail, Result};
use rust_patch::Patch;
use std::{collections::BTreeMap, path::PathBuf, sync::Arc};
use crate::{
fossick::{parser::DomParserResult, Fossicker},
options::PagefindServiceConfig,
PagefindInboundConfig, SearchOptions, SearchState,
};
#[derive(Debug)]
pub struct IndexedFileResponse {
pub page_word_count: u32,
pub page_url: String,
pub page_meta: BTreeMap<String, String>,
}
pub struct PagefindIndex {
search_index: Arc<tokio::sync::Mutex<SearchState>>,
}
impl PagefindIndex {
pub fn new(config: Option<PagefindServiceConfig>) -> Result<Self> {
let mut service_options: PagefindInboundConfig =
serde_json::from_str("{}").expect("All fields have serde defaults");
service_options.service = true;
if let Some(config) = config {
service_options = config.apply(service_options);
}
let options = SearchOptions::load(service_options)?;
Ok(Self {
search_index: Arc::new(tokio::sync::Mutex::new(SearchState::new(options))),
})
}
pub async fn add_html_file(
&mut self,
source_path: Option<String>,
url: Option<String>,
content: String,
) -> Result<IndexedFileResponse> {
if source_path.is_none() && url.is_none() {
bail!("Either source_path or url must be provided");
}
let file = Fossicker::new_synthetic(source_path.map(PathBuf::from), url, content);
let state = Arc::clone(&self.search_index);
let data = tokio::task::spawn_blocking(move || {
state.blocking_lock().fossick_one(file)
})
.await
.map_err(|e| anyhow::anyhow!("Task join error: {e}"))??;
Ok(IndexedFileResponse {
page_word_count: data.fragment.data.word_count as u32,
page_url: data.fragment.data.url,
page_meta: data.fragment.data.meta,
})
}
pub async fn add_custom_record(
&mut self,
url: String,
content: String,
language: String,
meta: Option<BTreeMap<String, String>>,
filters: Option<BTreeMap<String, Vec<String>>>,
sort: Option<BTreeMap<String, String>>,
) -> Result<IndexedFileResponse> {
let force_language = self
.search_index
.lock()
.await
.options
.force_language
.clone();
let data = DomParserResult {
digest: content,
filters: filters.unwrap_or_default(),
sort: sort.unwrap_or_default(),
meta: meta.unwrap_or_default(),
anchor_content: BTreeMap::new(),
has_custom_body: false,
force_inclusion: true,
has_html_element: true,
has_old_bundle_reference: false,
has_default_ui_reference: false,
language: force_language.unwrap_or(language),
};
let file = Fossicker::new_with_data(url, data);
let state = Arc::clone(&self.search_index);
let data = tokio::task::spawn_blocking(move || {
state.blocking_lock().fossick_one(file)
})
.await
.map_err(|e| anyhow::anyhow!("Task join error: {e}"))??;
Ok(IndexedFileResponse {
page_word_count: data.fragment.data.word_count as u32,
page_url: data.fragment.data.url,
page_meta: data.fragment.data.meta,
})
}
pub async fn add_directory(&mut self, path: String, glob: Option<String>) -> Result<usize> {
let defaults: PagefindInboundConfig =
serde_json::from_str("{}").expect("All fields have serde defaults");
let glob = glob.unwrap_or(defaults.glob);
let state = Arc::clone(&self.search_index);
let page_count = tokio::task::spawn_blocking(move || {
state.blocking_lock().fossick_many(PathBuf::from(path), glob)
})
.await
.map_err(|e| anyhow::anyhow!("Task join error: {e}"))??;
Ok(page_count)
}
pub async fn build_indexes(&mut self) -> Result<()> {
self.search_index.lock().await.build_indexes().await
}
pub async fn write_files(&mut self, output_path: Option<String>) -> Result<String> {
let mut state = self.search_index.lock().await;
state.build_indexes().await?;
let resolved_output_path = state.write_files(output_path.map(Into::into)).await;
Ok(resolved_output_path.to_string_lossy().into())
}
pub async fn get_files(&mut self) -> Result<Vec<SyntheticFile>> {
let mut state = self.search_index.lock().await;
state.build_indexes().await?;
Ok(state.get_files().await)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_add_file() {
let options = PagefindServiceConfig::builder()
.keep_index_url(true)
.force_language("en".to_string())
.build();
let mut index = PagefindIndex::new(Some(options)).unwrap();
let file_response = index
.add_html_file(
Some("test/index.html".into()),
None,
"<html><body><h1>Test content</h1></body></html>".into(),
)
.await;
let file = file_response.expect("Adding a file should succeed");
assert_eq!(file.page_word_count, 2);
assert_eq!(file.page_url, "/test/index.html");
let files_response = index.get_files().await;
let files = files_response.expect("Getting files should succeed");
let filenames: Vec<_> = files.into_iter().map(|f| f.filename).collect();
assert!(filenames.contains(&PathBuf::from("pagefind.js")));
assert!(filenames.contains(&PathBuf::from("pagefind-ui.js")));
assert!(filenames.contains(&PathBuf::from("pagefind-ui.css")));
assert!(filenames.contains(&PathBuf::from("wasm.en.pagefind")));
assert!(filenames.contains(&PathBuf::from("pagefind-entry.json")));
assert!(filenames
.iter()
.any(|f| f.to_string_lossy().ends_with(".pf_meta")));
assert!(filenames
.iter()
.any(|f| f.to_string_lossy().ends_with(".pf_fragment")));
assert!(filenames
.iter()
.any(|f| f.to_string_lossy().ends_with(".pf_index")));
}
}