use crate::{Error, Result};
use probly_search::{score::bm25, Index, QueryResult};
use serde::{Deserialize, Serialize};
use sos_backend::AccessPoint;
use sos_core::{crypto::AccessKey, VaultId};
use sos_vault::{
secret::{Secret, SecretId, SecretMeta, SecretRef, SecretType},
SecretAccess, Summary, Vault,
};
use std::{
borrow::Cow,
collections::{btree_map::Values, BTreeMap, HashMap, HashSet},
sync::Arc,
};
use tokio::sync::RwLock;
use unicode_segmentation::UnicodeSegmentation;
use url::Url;
#[doc(hidden)]
pub fn ngram_slice(s: &str, n: usize) -> HashSet<&str> {
let mut items: HashSet<&str> = HashSet::new();
let graphemes: Vec<usize> =
s.grapheme_indices(true).map(|v| v.0).collect();
for (index, offset) in graphemes.iter().enumerate() {
if let Some(end_offset) = graphemes.get(index + n) {
items.insert(&s[*offset..*end_offset]);
} else {
let mut end_offset = offset;
for i in 1..n {
if let Some(end) = graphemes.get(index + i) {
end_offset = end;
}
}
if end_offset > offset {
let val = &s[*offset..*end_offset];
items.insert(val);
}
}
}
items
}
#[derive(Debug, Clone, Eq, Ord, PartialEq, PartialOrd, Serialize)]
pub struct DocumentKey(String, VaultId, SecretId);
fn tokenizer(s: &str) -> Vec<Cow<'_, str>> {
let words = s.split(' ').collect::<HashSet<_>>();
let ngram2 = ngram_slice(s, 2);
let ngram3 = ngram_slice(s, 3);
let ngram4 = ngram_slice(s, 4);
let ngram5 = ngram_slice(s, 5);
let ngram: HashSet<&str> = ngram2.union(&ngram3).map(|s| &**s).collect();
let ngram: HashSet<&str> = ngram.union(&ngram4).map(|s| &**s).collect();
let ngram: HashSet<&str> = ngram.union(&ngram5).map(|s| &**s).collect();
let mut tokens: Vec<Cow<str>> = Vec::new();
for token in ngram.union(&words) {
tokens.push(Cow::Owned(token.to_lowercase()))
}
tokens
}
fn query_tokenizer(s: &str) -> Vec<Cow<'_, str>> {
s.split(' ')
.map(|s| s.to_lowercase())
.map(Cow::Owned)
.collect::<Vec<_>>()
}
fn label_extract(d: &Document) -> Vec<&str> {
vec![d.meta().label()]
}
fn tags_extract(d: &Document) -> Vec<&str> {
d.meta().tags().iter().map(|s| &s[..]).collect()
}
fn comment_extract(d: &Document) -> Vec<&str> {
if let Some(comment) = d.extra().comment() {
vec![comment]
} else {
vec![""]
}
}
fn website_extract(d: &Document) -> Vec<&str> {
if let Some(websites) = d.extra().websites() {
websites
} else {
vec![]
}
}
#[derive(Default, Debug, Clone)]
pub struct DocumentCount {
vaults: HashMap<VaultId, usize>,
kinds: HashMap<u8, usize>,
tags: HashMap<String, usize>,
favorites: usize,
archive: Option<VaultId>,
}
impl DocumentCount {
pub fn new(archive: Option<VaultId>) -> Self {
Self {
vaults: Default::default(),
kinds: Default::default(),
tags: Default::default(),
favorites: Default::default(),
archive,
}
}
pub fn set_archive_id(&mut self, archive: Option<VaultId>) {
self.archive = archive;
}
pub fn vaults(&self) -> &HashMap<VaultId, usize> {
&self.vaults
}
pub fn kinds(&self) -> &HashMap<u8, usize> {
&self.kinds
}
pub fn tags(&self) -> &HashMap<String, usize> {
&self.tags
}
pub fn favorites(&self) -> usize {
self.favorites
}
fn is_archived(&self, folder_id: &VaultId) -> bool {
if let Some(archive) = &self.archive {
return folder_id == archive;
}
false
}
fn remove(
&mut self,
folder_id: VaultId,
mut options: Option<(u8, HashSet<String>, bool)>,
) {
self.vaults
.entry(folder_id)
.and_modify(|counter| {
if *counter > 0 {
*counter -= 1;
}
})
.or_insert(0);
if let Some((kind, tags, favorite)) = options.take() {
if !self.is_archived(&folder_id) {
self.kinds
.entry(kind)
.and_modify(|counter| {
if *counter > 0 {
*counter -= 1;
}
})
.or_insert(0);
}
for tag in &tags {
self.tags
.entry(tag.to_owned())
.and_modify(|counter| {
if *counter > 0 {
*counter -= 1;
}
})
.or_insert(0);
let value = self.tags.get(tag).unwrap_or(&0);
if *value == 0 {
self.tags.remove(tag);
}
}
if favorite && self.favorites > 0 {
self.favorites -= 1;
}
}
}
fn add(
&mut self,
folder_id: VaultId,
kind: u8,
tags: &HashSet<String>,
favorite: bool,
) {
self.vaults
.entry(folder_id)
.and_modify(|counter| *counter += 1)
.or_insert(1);
if !self.is_archived(&folder_id) {
self.kinds
.entry(kind)
.and_modify(|counter| *counter += 1)
.or_insert(1);
}
for tag in tags {
self.tags
.entry(tag.to_owned())
.and_modify(|counter| *counter += 1)
.or_insert(1);
}
if favorite {
self.favorites += 1;
}
}
}
#[derive(Debug)]
pub struct IndexStatistics {
count: DocumentCount,
}
impl IndexStatistics {
pub fn new(archive: Option<VaultId>) -> Self {
Self {
count: DocumentCount::new(archive),
}
}
pub fn set_archive_id(&mut self, archive: Option<VaultId>) {
self.count.set_archive_id(archive);
}
pub fn count(&self) -> &DocumentCount {
&self.count
}
}
#[typeshare::typeshare]
#[derive(Default, Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ExtraFields {
pub comment: Option<String>,
pub contact_type: Option<vcard4::property::Kind>,
pub websites: Option<Vec<String>>,
}
impl From<&Secret> for ExtraFields {
fn from(value: &Secret) -> Self {
let mut extra = ExtraFields {
comment: value.user_data().comment().map(|c| c.to_owned()),
websites: value
.websites()
.map(|w| w.into_iter().map(|u| u.to_string()).collect()),
..Default::default()
};
if let Secret::Contact { vcard, .. } = value {
extra.contact_type = vcard
.kind
.as_ref()
.map(|p| p.value.clone())
.or(Some(vcard4::property::Kind::Individual));
}
extra
}
}
impl ExtraFields {
pub fn comment(&self) -> Option<&str> {
self.comment.as_ref().map(|c| &c[..])
}
pub fn websites(&self) -> Option<Vec<&str>> {
self.websites
.as_ref()
.map(|u| u.into_iter().map(|u| &u[..]).collect())
}
}
#[typeshare::typeshare]
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Document {
pub folder_id: VaultId,
pub secret_id: SecretId,
pub meta: SecretMeta,
pub extra: ExtraFields,
}
impl Document {
pub fn folder_id(&self) -> &VaultId {
&self.folder_id
}
pub fn id(&self) -> &SecretId {
&self.secret_id
}
pub fn meta(&self) -> &SecretMeta {
&self.meta
}
pub fn extra(&self) -> &ExtraFields {
&self.extra
}
}
pub struct SearchIndex {
index: Index<(VaultId, SecretId)>,
documents: BTreeMap<DocumentKey, Document>,
statistics: IndexStatistics,
}
impl Default for SearchIndex {
fn default() -> Self {
Self::new()
}
}
impl SearchIndex {
pub fn new() -> Self {
let index = Index::<(VaultId, SecretId)>::new(4);
Self {
index,
documents: Default::default(),
statistics: IndexStatistics::new(None),
}
}
pub fn set_archive_id(&mut self, archive: Option<VaultId>) {
self.statistics.set_archive_id(archive);
}
pub fn statistics(&self) -> &IndexStatistics {
&self.statistics
}
pub fn documents(&self) -> &BTreeMap<DocumentKey, Document> {
&self.documents
}
pub fn values(&self) -> Vec<&Document> {
self.documents.values().collect::<Vec<_>>()
}
pub fn values_iter(&self) -> Values<'_, DocumentKey, Document> {
self.documents.values()
}
pub fn len(&self) -> usize {
self.documents.len()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn find_by_label<'a>(
&'a self,
folder_id: &VaultId,
label: &str,
id: Option<&SecretId>,
) -> Option<&'a Document> {
self.documents
.values()
.filter(|d| {
if let Some(id) = id {
id != d.id()
} else {
true
}
})
.find(|d| d.folder_id() == folder_id && d.meta().label() == label)
}
pub fn find_by_label_any<'a>(
&'a self,
label: &str,
id: Option<&SecretId>,
case_insensitive: bool,
) -> Option<&'a Document> {
self.documents
.values()
.filter(|d| {
if let Some(id) = id {
id != d.id()
} else {
true
}
})
.find(|d| {
if case_insensitive {
d.meta().label().to_lowercase() == label.to_lowercase()
} else {
d.meta().label() == label
}
})
}
pub fn find_all_by_label<'a>(
&'a self,
label: &str,
id: Option<&SecretId>,
) -> Vec<&'a Document> {
self.documents
.iter()
.filter(|(k, v)| {
if let Some(id) = id {
if id == &k.1 {
false
} else {
v.meta().label() == label
}
} else {
v.meta().label() == label
}
})
.map(|(_, v)| v)
.collect::<Vec<_>>()
}
pub fn find_by_id<'a>(
&'a self,
folder_id: &VaultId,
id: &SecretId,
) -> Option<&'a Document> {
self.documents
.values()
.find(|d| d.folder_id() == folder_id && d.id() == id)
}
pub fn find_by_uuid_or_label<'a>(
&'a self,
folder_id: &VaultId,
target: &SecretRef,
) -> Option<&'a Document> {
match target {
SecretRef::Id(id) => self.find_by_id(folder_id, id),
SecretRef::Name(name) => {
self.find_by_label(folder_id, name, None)
}
}
}
pub fn prepare(
&self,
folder_id: &VaultId,
id: &SecretId,
meta: &SecretMeta,
secret: &Secret,
) -> Option<(DocumentKey, Document)> {
if self.find_by_id(folder_id, id).is_none() {
let doc = Document {
folder_id: *folder_id,
secret_id: *id,
meta: meta.clone(),
extra: secret.into(),
};
let key = DocumentKey(
doc.meta().label().to_lowercase(),
*folder_id,
*id,
);
Some((key, doc))
} else {
None
}
}
pub fn commit(&mut self, doc: Option<(DocumentKey, Document)>) {
if let Some((key, doc)) = doc {
let exists = self.documents.get(&key).is_some();
let doc = self.documents.entry(key).or_insert(doc);
if !exists {
self.index.add_document(
&[
label_extract,
tags_extract,
comment_extract,
website_extract,
],
tokenizer,
(doc.folder_id, doc.secret_id),
doc,
);
self.statistics.count.add(
doc.folder_id,
doc.meta().kind().into(),
doc.meta().tags(),
doc.meta().favorite(),
);
}
}
}
pub fn add(
&mut self,
folder_id: &VaultId,
id: &SecretId,
meta: &SecretMeta,
secret: &Secret,
) {
self.commit(self.prepare(folder_id, id, meta, secret));
}
pub fn update(
&mut self,
folder_id: &VaultId,
id: &SecretId,
meta: &SecretMeta,
secret: &Secret,
) {
self.remove(folder_id, id);
self.add(folder_id, id, meta, secret);
}
pub async fn add_folder(&mut self, folder: &AccessPoint) -> Result<()> {
let vault = folder.vault();
for id in vault.keys() {
let (meta, secret, _) = folder
.read_secret(id)
.await?
.ok_or_else(|| Error::NoSecretId(*folder.id(), *id))?;
self.add(folder.id(), id, &meta, &secret);
}
Ok(())
}
pub async fn remove_folder(
&mut self,
folder: &AccessPoint,
) -> Result<()> {
let vault = folder.vault();
for id in vault.keys() {
self.remove(folder.id(), id);
}
Ok(())
}
pub fn remove(&mut self, folder_id: &VaultId, id: &SecretId) {
let key = self
.documents
.keys()
.find(|key| &key.1 == folder_id && &key.2 == id)
.cloned();
let doc_info = if let Some(key) = &key {
let doc = self.documents.remove(key);
doc.map(|doc| {
let kind: u8 = doc.meta().kind().into();
(kind, doc.meta().tags().clone(), doc.meta().favorite())
})
} else {
None
};
self.index.remove_document((*folder_id, *id));
self.index.vacuum();
self.statistics.count.remove(*folder_id, doc_info);
}
pub fn remove_vault(&mut self, folder_id: &VaultId) {
let keys: Vec<DocumentKey> = self
.documents
.keys()
.filter(|k| &k.1 == folder_id)
.cloned()
.collect();
for key in keys {
self.remove(&key.1, &key.2);
self.documents.remove(&key);
}
}
pub fn remove_all(&mut self) {
let keys: Vec<DocumentKey> = self.documents.keys().cloned().collect();
for key in keys {
self.remove(&key.1, &key.2);
self.documents.remove(&key);
}
}
pub fn query(
&self,
needle: &str,
) -> Vec<QueryResult<(VaultId, SecretId)>> {
self.index.query(
needle,
&mut bm25::new(),
query_tokenizer,
&[1., 1., 1., 1.],
)
}
pub fn query_map(
&self,
needle: &str,
predicate: impl Fn(&Document) -> bool,
) -> Vec<&Document> {
let results = self.query(needle);
results
.into_iter()
.filter_map(|r| {
self.find_by_id(&r.key.0, &r.key.1)
.filter(|&doc| predicate(doc))
})
.collect::<Vec<_>>()
}
}
#[derive(Default, Debug, Serialize, Deserialize)]
pub struct AccountStatistics {
pub documents: usize,
pub folders: Vec<(Summary, usize)>,
pub tags: HashMap<String, usize>,
pub types: HashMap<SecretType, usize>,
pub favorites: usize,
}
pub struct AccountSearch {
pub search_index: Arc<RwLock<SearchIndex>>,
}
impl Default for AccountSearch {
fn default() -> Self {
Self::new()
}
}
impl AccountSearch {
pub fn new() -> Self {
Self {
search_index: Arc::new(RwLock::new(SearchIndex::new())),
}
}
#[doc(hidden)]
pub fn search(&self) -> Arc<RwLock<SearchIndex>> {
Arc::clone(&self.search_index)
}
pub async fn clear(&mut self) {
let mut writer = self.search_index.write().await;
writer.remove_all();
}
pub async fn add_folder(&self, folder: &AccessPoint) -> Result<()> {
let mut index = self.search_index.write().await;
index.add_folder(folder).await
}
pub async fn remove_folder(&self, folder_id: &VaultId) {
let mut writer = self.search_index.write().await;
writer.remove_vault(folder_id);
}
pub async fn add_vault(
&self,
vault: Vault,
key: &AccessKey,
) -> Result<()> {
let mut index = self.search_index.write().await;
let mut keeper = AccessPoint::from_vault(vault);
keeper.unlock(key).await?;
index.add_folder(&keeper).await?;
keeper.lock();
Ok(())
}
pub async fn document_count(&self) -> DocumentCount {
let reader = self.search_index.read().await;
reader.statistics().count().clone()
}
pub async fn document_exists(
&self,
folder_id: &VaultId,
label: &str,
id: Option<&SecretId>,
) -> bool {
let reader = self.search_index.read().await;
reader.find_by_label(folder_id, label, id).is_some()
}
pub async fn query_view(
&self,
views: &[DocumentView],
archive: Option<&ArchiveFilter>,
) -> Result<Vec<Document>> {
let index_reader = self.search_index.read().await;
let mut docs = Vec::with_capacity(index_reader.len());
for doc in index_reader.values_iter() {
for view in views {
if view.test(doc, archive) {
docs.push(doc.clone());
}
}
}
Ok(docs)
}
pub async fn query_map(
&self,
query: &str,
filter: QueryFilter,
) -> Result<Vec<Document>> {
let index_reader = self.search_index.read().await;
let mut docs = Vec::new();
let tags: HashSet<_> = filter.tags.iter().cloned().collect();
let predicate = self.query_predicate(filter, tags);
if !query.is_empty() {
for doc in index_reader.query_map(query, predicate) {
docs.push(doc.clone());
}
} else {
for doc in index_reader.values_iter() {
if predicate(doc) {
docs.push(doc.clone());
}
}
}
Ok(docs)
}
fn query_predicate(
&self,
filter: QueryFilter,
tags: HashSet<String>,
) -> impl Fn(&Document) -> bool {
move |doc| {
let tag_match = filter.tags.is_empty() || {
!tags
.intersection(doc.meta().tags())
.collect::<HashSet<_>>()
.is_empty()
};
let folder_id = doc.folder_id();
let folder_match = filter.folders.is_empty()
|| filter.folders.contains(folder_id);
let type_match = filter.types.is_empty()
|| filter.types.contains(doc.meta().kind());
tag_match && folder_match && type_match
}
}
}
#[typeshare::typeshare]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase", tag = "kind", content = "body")]
pub enum DocumentView {
All {
#[serde(rename = "ignoredTypes")]
ignored_types: Option<Vec<SecretType>>,
},
Vault(VaultId),
TypeId(SecretType),
Favorites,
Tags(Vec<String>),
Contact {
include_types: Option<Vec<vcard4::property::Kind>>,
},
Documents {
#[serde(rename = "folderId")]
folder_id: VaultId,
identifiers: Vec<SecretId>,
},
Websites {
matches: Option<Vec<Url>>,
exact: bool,
},
}
impl Default for DocumentView {
fn default() -> Self {
Self::All {
ignored_types: None,
}
}
}
impl DocumentView {
pub fn test(
&self,
doc: &Document,
archive: Option<&ArchiveFilter>,
) -> bool {
if let Some(filter) = archive {
if !filter.include_documents && doc.folder_id() == &filter.id {
return false;
}
}
match self {
DocumentView::All { ignored_types } => {
if let Some(ignored_types) = ignored_types {
return !ignored_types.contains(doc.meta().kind());
}
true
}
DocumentView::Vault(folder_id) => doc.folder_id() == folder_id,
DocumentView::TypeId(type_id) => doc.meta().kind() == type_id,
DocumentView::Favorites => doc.meta().favorite(),
DocumentView::Tags(tags) => {
let tags: HashSet<_> = tags.iter().cloned().collect();
!tags
.intersection(doc.meta().tags())
.collect::<HashSet<_>>()
.is_empty()
}
DocumentView::Contact { include_types } => {
if doc.meta().kind() == &SecretType::Contact {
if let Some(include_types) = include_types {
if let Some(contact_type) = &doc.extra().contact_type
{
let contact_type: vcard4::property::Kind =
contact_type.clone();
return include_types.contains(&contact_type);
} else {
return false;
}
}
return true;
}
false
}
DocumentView::Documents {
folder_id,
identifiers,
} => {
doc.folder_id() == folder_id && identifiers.contains(doc.id())
}
DocumentView::Websites { matches, exact } => {
if let Some(sites) = doc.extra().websites() {
if sites.is_empty() {
false
} else {
if let Some(targets) = matches {
let mut urls: Vec<Url> =
Vec::with_capacity(sites.len());
for site in sites {
match site.parse() {
Ok(url) => urls.push(url),
Err(e) => {
tracing::warn!(
error = %e,
"search::url_parse");
}
}
}
if *exact {
for url in targets {
if urls.contains(url) {
return true;
}
}
false
} else {
for url in targets {
for site in &urls {
if url.origin() == site.origin() {
return true;
}
}
}
false
}
} else {
true
}
}
} else {
false
}
}
}
}
}
#[typeshare::typeshare]
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
pub struct QueryFilter {
pub tags: Vec<String>,
pub folders: Vec<VaultId>,
pub types: Vec<SecretType>,
}
#[typeshare::typeshare]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ArchiveFilter {
pub id: VaultId,
pub include_documents: bool,
}