Skip to main content

hashtree_cli/socialgraph/
mod.rs

1pub mod access;
2pub mod crawler;
3pub mod local_lists;
4pub mod snapshot;
5
6pub use access::SocialGraphAccessControl;
7pub use crawler::SocialGraphCrawler;
8pub use local_lists::{
9    read_local_list_file_state, sync_local_list_files_force, sync_local_list_files_if_changed,
10    LocalListFileState, LocalListSyncOutcome,
11};
12
13mod index_buckets;
14
15use index_buckets::{
16    dedupe_events, latest_metadata_events_by_pubkey, EventIndexBucket, ProfileIndexBucket,
17};
18
19use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
20use std::path::{Path, PathBuf};
21use std::sync::{Arc, Mutex as StdMutex};
22
23use anyhow::{Context, Result};
24use bytes::Bytes;
25use futures::executor::block_on;
26use hashtree_core::{nhash_encode_full, Cid, HashTree, HashTreeConfig, NHashData};
27use hashtree_index::BTree;
28use hashtree_nostr::{
29    is_parameterized_replaceable_kind, is_replaceable_kind, ListEventsOptions, NostrEventStore,
30    NostrEventStoreError, ProfileGuard as NostrProfileGuard, StoredNostrEvent,
31};
32#[cfg(test)]
33use hashtree_nostr::{
34    reset_profile as reset_nostr_profile, set_profile_enabled as set_nostr_profile_enabled,
35    take_profile as take_nostr_profile,
36};
37use nostr::{Event, Filter, JsonUtil, Kind, SingleLetterTag};
38use nostr_social_graph::{
39    BinaryBudget, GraphStats, NostrEvent as GraphEvent, SocialGraph,
40    SocialGraphBackend as NostrSocialGraphBackend,
41};
42use nostr_social_graph_heed::HeedSocialGraph;
43
44use crate::storage::{LocalStore, StorageRouter};
45
46#[cfg(test)]
47use std::sync::{Mutex, MutexGuard, OnceLock};
48#[cfg(test)]
49use std::time::Instant;
50
51pub type UserSet = BTreeSet<[u8; 32]>;
52
53const DEFAULT_ROOT_HEX: &str = "0000000000000000000000000000000000000000000000000000000000000000";
54const EVENTS_ROOT_FILE: &str = "events-root.msgpack";
55const AMBIENT_EVENTS_ROOT_FILE: &str = "events-root-ambient.msgpack";
56const AMBIENT_EVENTS_BLOB_DIR: &str = "ambient-blobs";
57const PROFILE_SEARCH_ROOT_FILE: &str = "profile-search-root.msgpack";
58const PROFILES_BY_PUBKEY_ROOT_FILE: &str = "profiles-by-pubkey-root.msgpack";
59const UNKNOWN_FOLLOW_DISTANCE: u32 = 1000;
60const DEFAULT_SOCIALGRAPH_MAP_SIZE_BYTES: u64 = 64 * 1024 * 1024;
61const SOCIALGRAPH_MAX_DBS: u32 = 16;
62const PROFILE_SEARCH_INDEX_ORDER: usize = 64;
63const PROFILE_SEARCH_PREFIX: &str = "p:";
64const PROFILE_NAME_MAX_LENGTH: usize = 100;
65
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum EventStorageClass {
68    Public,
69    Ambient,
70}
71
72#[cfg_attr(not(test), allow(dead_code))]
73#[derive(Debug, Clone, Copy, PartialEq, Eq)]
74pub(crate) enum EventQueryScope {
75    PublicOnly,
76    AmbientOnly,
77    All,
78}
79
80#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
81struct StoredCid {
82    hash: [u8; 32],
83    key: Option<[u8; 32]>,
84}
85
86#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
87pub struct StoredProfileSearchEntry {
88    pub pubkey: String,
89    pub name: String,
90    #[serde(default)]
91    pub aliases: Vec<String>,
92    #[serde(default)]
93    pub nip05: Option<String>,
94    pub created_at: u64,
95    pub event_nhash: String,
96}
97
98#[derive(Debug, Clone, Default, serde::Serialize)]
99pub struct SocialGraphStats {
100    pub total_users: usize,
101    pub root: Option<String>,
102    pub total_follows: usize,
103    pub max_depth: u32,
104    pub size_by_distance: BTreeMap<u32, usize>,
105    pub enabled: bool,
106}
107
108#[derive(Debug, Clone)]
109struct DistanceCache {
110    stats: SocialGraphStats,
111    users_by_distance: BTreeMap<u32, Vec<[u8; 32]>>,
112}
113
114#[derive(Debug, thiserror::Error)]
115#[error("{0}")]
116pub struct UpstreamGraphBackendError(String);
117
118pub struct SocialGraphStore {
119    graph: StdMutex<HeedSocialGraph>,
120    distance_cache: StdMutex<Option<DistanceCache>>,
121    public_events: EventIndexBucket,
122    ambient_events: EventIndexBucket,
123    profile_index: ProfileIndexBucket,
124    profile_index_overmute_threshold: StdMutex<f64>,
125}
126
127pub trait SocialGraphBackend: Send + Sync {
128    fn stats(&self) -> Result<SocialGraphStats>;
129    fn users_by_follow_distance(&self, distance: u32) -> Result<Vec<[u8; 32]>>;
130    fn follow_distance(&self, pk_bytes: &[u8; 32]) -> Result<Option<u32>>;
131    fn follow_list_created_at(&self, owner: &[u8; 32]) -> Result<Option<u64>>;
132    fn followed_targets(&self, owner: &[u8; 32]) -> Result<UserSet>;
133    fn is_overmuted_user(&self, user_pk: &[u8; 32], threshold: f64) -> Result<bool>;
134    fn profile_search_root(&self) -> Result<Option<Cid>> {
135        Ok(None)
136    }
137    fn snapshot_chunks(&self, root: &[u8; 32], options: &BinaryBudget) -> Result<Vec<Bytes>>;
138    fn ingest_event(&self, event: &Event) -> Result<()>;
139    fn ingest_event_with_storage_class(
140        &self,
141        event: &Event,
142        storage_class: EventStorageClass,
143    ) -> Result<()> {
144        let _ = storage_class;
145        self.ingest_event(event)
146    }
147    fn ingest_events(&self, events: &[Event]) -> Result<()> {
148        for event in events {
149            self.ingest_event(event)?;
150        }
151        Ok(())
152    }
153    fn ingest_events_with_storage_class(
154        &self,
155        events: &[Event],
156        storage_class: EventStorageClass,
157    ) -> Result<()> {
158        for event in events {
159            self.ingest_event_with_storage_class(event, storage_class)?;
160        }
161        Ok(())
162    }
163    fn ingest_graph_events(&self, events: &[Event]) -> Result<()> {
164        self.ingest_events(events)
165    }
166    fn query_events(&self, filter: &Filter, limit: usize) -> Result<Vec<Event>>;
167}
168
169#[cfg(test)]
170pub type TestLockGuard = MutexGuard<'static, ()>;
171
172#[cfg(test)]
173static NDB_TEST_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
174
175#[cfg(test)]
176pub fn test_lock() -> TestLockGuard {
177    NDB_TEST_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap()
178}
179
180pub fn open_social_graph_store(data_dir: &Path) -> Result<Arc<SocialGraphStore>> {
181    open_social_graph_store_with_mapsize(data_dir, None)
182}
183
184pub fn open_social_graph_store_with_mapsize(
185    data_dir: &Path,
186    mapsize_bytes: Option<u64>,
187) -> Result<Arc<SocialGraphStore>> {
188    let db_dir = data_dir.join("socialgraph");
189    open_social_graph_store_at_path(&db_dir, mapsize_bytes)
190}
191
192pub fn open_social_graph_store_with_storage(
193    data_dir: &Path,
194    store: Arc<StorageRouter>,
195    mapsize_bytes: Option<u64>,
196) -> Result<Arc<SocialGraphStore>> {
197    let db_dir = data_dir.join("socialgraph");
198    open_social_graph_store_at_path_with_storage(&db_dir, store, mapsize_bytes)
199}
200
201pub fn open_social_graph_store_at_path(
202    db_dir: &Path,
203    mapsize_bytes: Option<u64>,
204) -> Result<Arc<SocialGraphStore>> {
205    let config = hashtree_config::Config::load_or_default();
206    let backend = &config.storage.backend;
207    let local_store = Arc::new(
208        LocalStore::new_with_lmdb_map_size(db_dir.join("blobs"), backend, mapsize_bytes)
209            .map_err(|err| anyhow::anyhow!("Failed to create social graph blob store: {err}"))?,
210    );
211    let store = Arc::new(StorageRouter::new(local_store));
212    open_social_graph_store_at_path_with_storage(db_dir, store, mapsize_bytes)
213}
214
215pub fn open_social_graph_store_at_path_with_storage(
216    db_dir: &Path,
217    store: Arc<StorageRouter>,
218    mapsize_bytes: Option<u64>,
219) -> Result<Arc<SocialGraphStore>> {
220    let ambient_backend = store.local_store().backend();
221    let ambient_local = Arc::new(
222        LocalStore::new_with_lmdb_map_size(
223            db_dir.join(AMBIENT_EVENTS_BLOB_DIR),
224            &ambient_backend,
225            mapsize_bytes,
226        )
227        .map_err(|err| {
228            anyhow::anyhow!("Failed to create social graph ambient blob store: {err}")
229        })?,
230    );
231    let ambient_store = Arc::new(StorageRouter::new(ambient_local));
232    open_social_graph_store_at_path_with_storage_split(db_dir, store, ambient_store, mapsize_bytes)
233}
234
235pub fn open_social_graph_store_at_path_with_storage_split(
236    db_dir: &Path,
237    public_store: Arc<StorageRouter>,
238    ambient_store: Arc<StorageRouter>,
239    mapsize_bytes: Option<u64>,
240) -> Result<Arc<SocialGraphStore>> {
241    std::fs::create_dir_all(db_dir)?;
242    if let Some(size) = mapsize_bytes {
243        ensure_social_graph_mapsize(db_dir, size)?;
244    }
245    let graph = HeedSocialGraph::open(db_dir, DEFAULT_ROOT_HEX)
246        .context("open nostr-social-graph heed backend")?;
247
248    Ok(Arc::new(SocialGraphStore {
249        graph: StdMutex::new(graph),
250        distance_cache: StdMutex::new(None),
251        public_events: EventIndexBucket {
252            event_store: NostrEventStore::new(Arc::clone(&public_store)),
253            root_path: db_dir.join(EVENTS_ROOT_FILE),
254        },
255        ambient_events: EventIndexBucket {
256            event_store: NostrEventStore::new(ambient_store),
257            root_path: db_dir.join(AMBIENT_EVENTS_ROOT_FILE),
258        },
259        profile_index: ProfileIndexBucket {
260            tree: HashTree::new(HashTreeConfig::new(Arc::clone(&public_store))),
261            index: BTree::new(
262                public_store,
263                hashtree_index::BTreeOptions {
264                    order: Some(PROFILE_SEARCH_INDEX_ORDER),
265                },
266            ),
267            by_pubkey_root_path: db_dir.join(PROFILES_BY_PUBKEY_ROOT_FILE),
268            search_root_path: db_dir.join(PROFILE_SEARCH_ROOT_FILE),
269        },
270        profile_index_overmute_threshold: StdMutex::new(1.0),
271    }))
272}
273
274pub fn set_social_graph_root(store: &SocialGraphStore, pk_bytes: &[u8; 32]) {
275    if let Err(err) = store.set_root(pk_bytes) {
276        tracing::warn!("Failed to set social graph root: {err}");
277    }
278}
279
280pub fn get_follow_distance(
281    backend: &(impl SocialGraphBackend + ?Sized),
282    pk_bytes: &[u8; 32],
283) -> Option<u32> {
284    backend.follow_distance(pk_bytes).ok().flatten()
285}
286
287pub fn get_follows(
288    backend: &(impl SocialGraphBackend + ?Sized),
289    pk_bytes: &[u8; 32],
290) -> Vec<[u8; 32]> {
291    match backend.followed_targets(pk_bytes) {
292        Ok(set) => set.into_iter().collect(),
293        Err(_) => Vec::new(),
294    }
295}
296
297pub fn is_overmuted(
298    backend: &(impl SocialGraphBackend + ?Sized),
299    _root_pk: &[u8; 32],
300    user_pk: &[u8; 32],
301    threshold: f64,
302) -> bool {
303    backend
304        .is_overmuted_user(user_pk, threshold)
305        .unwrap_or(false)
306}
307
308pub fn ingest_event(backend: &(impl SocialGraphBackend + ?Sized), _sub_id: &str, event_json: &str) {
309    let event = match Event::from_json(event_json) {
310        Ok(event) => event,
311        Err(_) => return,
312    };
313
314    if let Err(err) = backend.ingest_event(&event) {
315        tracing::warn!("Failed to ingest social graph event: {err}");
316    }
317}
318
319pub fn ingest_parsed_event(
320    backend: &(impl SocialGraphBackend + ?Sized),
321    event: &Event,
322) -> Result<()> {
323    backend.ingest_event(event)
324}
325
326pub fn ingest_parsed_event_with_storage_class(
327    backend: &(impl SocialGraphBackend + ?Sized),
328    event: &Event,
329    storage_class: EventStorageClass,
330) -> Result<()> {
331    backend.ingest_event_with_storage_class(event, storage_class)
332}
333
334pub fn ingest_parsed_events(
335    backend: &(impl SocialGraphBackend + ?Sized),
336    events: &[Event],
337) -> Result<()> {
338    backend.ingest_events(events)
339}
340
341pub fn ingest_parsed_events_with_storage_class(
342    backend: &(impl SocialGraphBackend + ?Sized),
343    events: &[Event],
344    storage_class: EventStorageClass,
345) -> Result<()> {
346    backend.ingest_events_with_storage_class(events, storage_class)
347}
348
349pub fn ingest_graph_parsed_events(
350    backend: &(impl SocialGraphBackend + ?Sized),
351    events: &[Event],
352) -> Result<()> {
353    backend.ingest_graph_events(events)
354}
355
356pub fn query_events(
357    backend: &(impl SocialGraphBackend + ?Sized),
358    filter: &Filter,
359    limit: usize,
360) -> Vec<Event> {
361    backend.query_events(filter, limit).unwrap_or_default()
362}
363
364impl SocialGraphStore {
365    pub fn set_profile_index_overmute_threshold(&self, threshold: f64) {
366        *self
367            .profile_index_overmute_threshold
368            .lock()
369            .expect("profile index overmute threshold") = threshold;
370    }
371
372    fn profile_index_overmute_threshold(&self) -> f64 {
373        *self
374            .profile_index_overmute_threshold
375            .lock()
376            .expect("profile index overmute threshold")
377    }
378
379    fn invalidate_distance_cache(&self) {
380        *self.distance_cache.lock().unwrap() = None;
381    }
382
383    fn build_distance_cache(state: nostr_social_graph::SocialGraphState) -> Result<DistanceCache> {
384        let unique_ids = state
385            .unique_ids
386            .into_iter()
387            .map(|(pubkey, id)| decode_pubkey(&pubkey).map(|decoded| (id, decoded)))
388            .collect::<Result<HashMap<_, _>>>()?;
389
390        let mut users_by_distance = BTreeMap::new();
391        let mut size_by_distance = BTreeMap::new();
392        for (distance, users) in state.users_by_follow_distance {
393            let decoded = users
394                .into_iter()
395                .filter_map(|id| unique_ids.get(&id).copied())
396                .collect::<Vec<_>>();
397            size_by_distance.insert(distance, decoded.len());
398            users_by_distance.insert(distance, decoded);
399        }
400
401        let total_follows = state
402            .followed_by_user
403            .iter()
404            .map(|(_, targets)| targets.len())
405            .sum::<usize>();
406        let total_users = size_by_distance.values().copied().sum();
407        let max_depth = size_by_distance.keys().copied().max().unwrap_or_default();
408
409        Ok(DistanceCache {
410            stats: SocialGraphStats {
411                total_users,
412                root: Some(state.root),
413                total_follows,
414                max_depth,
415                size_by_distance,
416                enabled: true,
417            },
418            users_by_distance,
419        })
420    }
421
422    fn load_distance_cache(&self) -> Result<DistanceCache> {
423        if let Some(cache) = self.distance_cache.lock().unwrap().clone() {
424            return Ok(cache);
425        }
426
427        let state = {
428            let graph = self.graph.lock().unwrap();
429            graph.export_state().context("export social graph state")?
430        };
431        let cache = Self::build_distance_cache(state)?;
432        *self.distance_cache.lock().unwrap() = Some(cache.clone());
433        Ok(cache)
434    }
435
436    fn set_root(&self, root: &[u8; 32]) -> Result<()> {
437        let root_hex = hex::encode(root);
438        {
439            let mut graph = self.graph.lock().unwrap();
440            if should_replace_placeholder_root(&graph)? {
441                let fresh = SocialGraph::new(&root_hex);
442                graph
443                    .replace_state(&fresh.export_state())
444                    .context("replace placeholder social graph root")?;
445            } else {
446                graph
447                    .set_root(&root_hex)
448                    .context("set nostr-social-graph root")?;
449            }
450        }
451        self.invalidate_distance_cache();
452        Ok(())
453    }
454
455    fn stats(&self) -> Result<SocialGraphStats> {
456        Ok(self.load_distance_cache()?.stats)
457    }
458
459    fn follow_distance(&self, pk_bytes: &[u8; 32]) -> Result<Option<u32>> {
460        let graph = self.graph.lock().unwrap();
461        let distance = graph
462            .get_follow_distance(&hex::encode(pk_bytes))
463            .context("read social graph follow distance")?;
464        Ok((distance != UNKNOWN_FOLLOW_DISTANCE).then_some(distance))
465    }
466
467    fn users_by_follow_distance(&self, distance: u32) -> Result<Vec<[u8; 32]>> {
468        Ok(self
469            .load_distance_cache()?
470            .users_by_distance
471            .get(&distance)
472            .cloned()
473            .unwrap_or_default())
474    }
475
476    fn follow_list_created_at(&self, owner: &[u8; 32]) -> Result<Option<u64>> {
477        let graph = self.graph.lock().unwrap();
478        graph
479            .get_follow_list_created_at(&hex::encode(owner))
480            .context("read social graph follow list timestamp")
481    }
482
483    fn followed_targets(&self, owner: &[u8; 32]) -> Result<UserSet> {
484        let graph = self.graph.lock().unwrap();
485        decode_pubkey_set(
486            graph
487                .get_followed_by_user(&hex::encode(owner))
488                .context("read followed targets")?,
489        )
490    }
491
492    fn is_overmuted_user(&self, user_pk: &[u8; 32], threshold: f64) -> Result<bool> {
493        if threshold <= 0.0 {
494            return Ok(false);
495        }
496        let graph = self.graph.lock().unwrap();
497        graph
498            .is_overmuted(&hex::encode(user_pk), threshold)
499            .context("check social graph overmute")
500    }
501
502    #[cfg_attr(not(test), allow(dead_code))]
503    pub fn profile_search_root(&self) -> Result<Option<Cid>> {
504        self.profile_index.search_root()
505    }
506
507    pub fn public_events_root(&self) -> Result<Option<Cid>> {
508        self.public_events.events_root()
509    }
510
511    pub(crate) fn write_public_events_root(&self, root: Option<&Cid>) -> Result<()> {
512        self.public_events.write_events_root(root)
513    }
514
515    #[cfg_attr(not(test), allow(dead_code))]
516    pub fn latest_profile_event(&self, pubkey_hex: &str) -> Result<Option<Event>> {
517        self.profile_index.profile_event_for_pubkey(pubkey_hex)
518    }
519
520    #[cfg_attr(not(test), allow(dead_code))]
521    pub fn profile_search_entries_for_prefix(
522        &self,
523        prefix: &str,
524    ) -> Result<Vec<(String, StoredProfileSearchEntry)>> {
525        self.profile_index.search_entries_for_prefix(prefix)
526    }
527
528    pub fn sync_profile_index_for_events(&self, events: &[Event]) -> Result<()> {
529        self.update_profile_index_for_events(events)
530    }
531
532    pub(crate) fn rebuild_profile_index_for_events(&self, events: &[Event]) -> Result<()> {
533        let latest_by_pubkey = self.filtered_latest_metadata_events_by_pubkey(events)?;
534        let (by_pubkey_root, search_root) = self
535            .profile_index
536            .rebuild_profile_events(latest_by_pubkey.into_values())?;
537        self.profile_index
538            .write_by_pubkey_root(by_pubkey_root.as_ref())?;
539        self.profile_index.write_search_root(search_root.as_ref())?;
540        Ok(())
541    }
542
543    pub fn rebuild_profile_index_from_stored_events(&self) -> Result<usize> {
544        let public_events_root = self.public_events.events_root()?;
545        let ambient_events_root = self.ambient_events.events_root()?;
546        if public_events_root.is_none() && ambient_events_root.is_none() {
547            self.profile_index.write_by_pubkey_root(None)?;
548            self.profile_index.write_search_root(None)?;
549            return Ok(0);
550        }
551
552        let mut events = Vec::new();
553        for (bucket, root) in [
554            (&self.public_events, public_events_root),
555            (&self.ambient_events, ambient_events_root),
556        ] {
557            let Some(root) = root else {
558                continue;
559            };
560            let stored = block_on(bucket.event_store.list_by_kind_lossy(
561                Some(&root),
562                Kind::Metadata.as_u16() as u32,
563                ListEventsOptions::default(),
564            ))
565            .map_err(map_event_store_error)?;
566            events.extend(
567                stored
568                    .into_iter()
569                    .map(nostr_event_from_stored)
570                    .collect::<Result<Vec<_>>>()?,
571            );
572        }
573
574        let latest_count = self
575            .filtered_latest_metadata_events_by_pubkey(&events)?
576            .len();
577        self.rebuild_profile_index_for_events(&events)?;
578        Ok(latest_count)
579    }
580
581    pub fn rebuild_event_indexes_from_stored_events(&self) -> Result<(usize, usize)> {
582        let public_count =
583            self.rebuild_event_index_bucket_from_stored_events(&self.public_events)?;
584        let ambient_count =
585            self.rebuild_event_index_bucket_from_stored_events(&self.ambient_events)?;
586        self.rebuild_profile_index_from_stored_events()?;
587        Ok((public_count, ambient_count))
588    }
589
590    fn rebuild_event_index_bucket_from_stored_events(
591        &self,
592        bucket: &EventIndexBucket,
593    ) -> Result<usize> {
594        let Some(root) = bucket.events_root()? else {
595            bucket.write_events_root(None)?;
596            return Ok(0);
597        };
598
599        let manifest = block_on(bucket.event_store.get_manifest(Some(&root)))
600            .map_err(map_event_store_error)?;
601        if manifest.by_kind_time_author.is_none() {
602            let next_root = block_on(bucket.event_store.upgrade_manifest_indexes(Some(&root)))
603                .map_err(map_event_store_error)?;
604            if next_root.as_ref() != Some(&root) {
605                bucket.write_events_root(next_root.as_ref())?;
606                return Ok(0);
607            }
608        }
609
610        let stored = block_on(
611            bucket
612                .event_store
613                .list_recent_lossy(Some(&root), ListEventsOptions::default()),
614        )
615        .map_err(map_event_store_error)?;
616        let count = stored.len();
617        let next_root =
618            block_on(bucket.event_store.build(None, stored)).map_err(map_event_store_error)?;
619        bucket.write_events_root(next_root.as_ref())?;
620        Ok(count)
621    }
622
623    fn update_profile_index_for_events(&self, events: &[Event]) -> Result<()> {
624        let latest_by_pubkey = latest_metadata_events_by_pubkey(events);
625        let threshold = self.profile_index_overmute_threshold();
626
627        if latest_by_pubkey.is_empty() {
628            return Ok(());
629        }
630
631        let mut by_pubkey_root = self.profile_index.by_pubkey_root()?;
632        let mut search_root = self.profile_index.search_root()?;
633        let mut changed = false;
634
635        for event in latest_by_pubkey.into_values() {
636            let overmuted = self.is_overmuted_user(&event.pubkey.to_bytes(), threshold)?;
637            let (next_by_pubkey_root, next_search_root, updated) = if overmuted {
638                self.profile_index.remove_profile_event(
639                    by_pubkey_root.as_ref(),
640                    search_root.as_ref(),
641                    &event.pubkey.to_hex(),
642                )?
643            } else {
644                self.profile_index.update_profile_event(
645                    by_pubkey_root.as_ref(),
646                    search_root.as_ref(),
647                    event,
648                )?
649            };
650            if updated {
651                by_pubkey_root = next_by_pubkey_root;
652                search_root = next_search_root;
653                changed = true;
654            }
655        }
656
657        if changed {
658            self.profile_index
659                .write_by_pubkey_root(by_pubkey_root.as_ref())?;
660            self.profile_index.write_search_root(search_root.as_ref())?;
661        }
662
663        Ok(())
664    }
665
666    fn filtered_latest_metadata_events_by_pubkey<'a>(
667        &self,
668        events: &'a [Event],
669    ) -> Result<BTreeMap<String, &'a Event>> {
670        let threshold = self.profile_index_overmute_threshold();
671        let mut latest_by_pubkey = BTreeMap::<String, &Event>::new();
672        for event in events.iter().filter(|event| event.kind == Kind::Metadata) {
673            if self.is_overmuted_user(&event.pubkey.to_bytes(), threshold)? {
674                continue;
675            }
676            let pubkey = event.pubkey.to_hex();
677            match latest_by_pubkey.get(&pubkey) {
678                Some(current) if compare_nostr_events(event, current).is_le() => {}
679                _ => {
680                    latest_by_pubkey.insert(pubkey, event);
681                }
682            }
683        }
684        Ok(latest_by_pubkey)
685    }
686
687    fn snapshot_chunks(&self, root: &[u8; 32], options: &BinaryBudget) -> Result<Vec<Bytes>> {
688        let state = {
689            let graph = self.graph.lock().unwrap();
690            graph.export_state().context("export social graph state")?
691        };
692        let mut graph = SocialGraph::from_state(state).context("rebuild social graph state")?;
693        let root_hex = hex::encode(root);
694        if graph.get_root() != root_hex {
695            graph
696                .set_root(&root_hex)
697                .context("set snapshot social graph root")?;
698        }
699        let chunks = graph
700            .to_binary_chunks_with_budget(*options)
701            .context("encode social graph snapshot")?;
702        Ok(chunks.into_iter().map(Bytes::from).collect())
703    }
704
705    fn ingest_event(&self, event: &Event) -> Result<()> {
706        self.ingest_event_with_storage_class(event, self.default_storage_class_for(event)?)
707    }
708
709    fn ingest_events(&self, events: &[Event]) -> Result<()> {
710        if events.is_empty() {
711            return Ok(());
712        }
713
714        let mut public = Vec::new();
715        let mut ambient = Vec::new();
716        for event in events {
717            match self.default_storage_class_for(event)? {
718                EventStorageClass::Public => public.push(event.clone()),
719                EventStorageClass::Ambient => ambient.push(event.clone()),
720            }
721        }
722
723        if !public.is_empty() {
724            self.ingest_events_with_storage_class(&public, EventStorageClass::Public)?;
725        }
726        if !ambient.is_empty() {
727            self.ingest_events_with_storage_class(&ambient, EventStorageClass::Ambient)?;
728        }
729
730        Ok(())
731    }
732
733    fn apply_graph_events_only(&self, events: &[Event]) -> Result<()> {
734        let graph_events = events
735            .iter()
736            .filter(|event| is_social_graph_event(event.kind))
737            .collect::<Vec<_>>();
738        if graph_events.is_empty() {
739            return Ok(());
740        }
741
742        {
743            let mut graph = self.graph.lock().unwrap();
744            let mut snapshot = SocialGraph::from_state(
745                graph
746                    .export_state()
747                    .context("export social graph state for graph-only ingest")?,
748            )
749            .context("rebuild social graph state for graph-only ingest")?;
750            for event in graph_events {
751                snapshot.handle_event(&graph_event_from_nostr(event), true, 0.0);
752            }
753            graph
754                .replace_state(&snapshot.export_state())
755                .context("replace graph-only social graph state")?;
756        }
757        self.invalidate_distance_cache();
758        Ok(())
759    }
760
761    fn query_events(&self, filter: &Filter, limit: usize) -> Result<Vec<Event>> {
762        self.query_events_in_scope(filter, limit, EventQueryScope::All)
763    }
764
765    fn default_storage_class_for(&self, event: &Event) -> Result<EventStorageClass> {
766        let graph = self.graph.lock().unwrap();
767        let root_hex = graph.get_root().context("read social graph root")?;
768        if root_hex != DEFAULT_ROOT_HEX && root_hex == event.pubkey.to_hex() {
769            return Ok(EventStorageClass::Public);
770        }
771        Ok(EventStorageClass::Ambient)
772    }
773
774    fn bucket(&self, storage_class: EventStorageClass) -> &EventIndexBucket {
775        match storage_class {
776            EventStorageClass::Public => &self.public_events,
777            EventStorageClass::Ambient => &self.ambient_events,
778        }
779    }
780
781    fn ingest_event_with_storage_class(
782        &self,
783        event: &Event,
784        storage_class: EventStorageClass,
785    ) -> Result<()> {
786        let current_root = self.bucket(storage_class).events_root()?;
787        let next_root = self
788            .bucket(storage_class)
789            .store_event(current_root.as_ref(), event)?;
790        self.bucket(storage_class)
791            .write_events_root(Some(&next_root))?;
792
793        self.update_profile_index_for_events(std::slice::from_ref(event))?;
794
795        if is_social_graph_event(event.kind) {
796            {
797                let mut graph = self.graph.lock().unwrap();
798                graph
799                    .handle_event(&graph_event_from_nostr(event), true, 0.0)
800                    .context("ingest social graph event into nostr-social-graph")?;
801            }
802            self.invalidate_distance_cache();
803        }
804
805        Ok(())
806    }
807
808    fn ingest_events_with_storage_class(
809        &self,
810        events: &[Event],
811        storage_class: EventStorageClass,
812    ) -> Result<()> {
813        if events.is_empty() {
814            return Ok(());
815        }
816
817        let bucket = self.bucket(storage_class);
818        let current_root = bucket.events_root()?;
819        let stored_events = events
820            .iter()
821            .map(stored_event_from_nostr)
822            .collect::<Vec<_>>();
823        let next_root = block_on(
824            bucket
825                .event_store
826                .build(current_root.as_ref(), stored_events),
827        )
828        .map_err(map_event_store_error)?;
829        bucket.write_events_root(next_root.as_ref())?;
830
831        self.update_profile_index_for_events(events)?;
832
833        let graph_events = events
834            .iter()
835            .filter(|event| is_social_graph_event(event.kind))
836            .collect::<Vec<_>>();
837        if graph_events.is_empty() {
838            return Ok(());
839        }
840
841        {
842            let mut graph = self.graph.lock().unwrap();
843            let mut snapshot = SocialGraph::from_state(
844                graph
845                    .export_state()
846                    .context("export social graph state for batch ingest")?,
847            )
848            .context("rebuild social graph state for batch ingest")?;
849            for event in graph_events {
850                snapshot.handle_event(&graph_event_from_nostr(event), true, 0.0);
851            }
852            graph
853                .replace_state(&snapshot.export_state())
854                .context("replace batched social graph state")?;
855        }
856        self.invalidate_distance_cache();
857
858        Ok(())
859    }
860
861    pub(crate) fn query_events_in_scope(
862        &self,
863        filter: &Filter,
864        limit: usize,
865        scope: EventQueryScope,
866    ) -> Result<Vec<Event>> {
867        if limit == 0 {
868            return Ok(Vec::new());
869        }
870
871        let buckets: &[&EventIndexBucket] = match scope {
872            EventQueryScope::PublicOnly => &[&self.public_events],
873            EventQueryScope::AmbientOnly => &[&self.ambient_events],
874            EventQueryScope::All => &[&self.public_events, &self.ambient_events],
875        };
876
877        let mut candidates = Vec::new();
878        for bucket in buckets {
879            candidates.extend(bucket.query_events(filter, limit)?);
880        }
881
882        let mut deduped = dedupe_events(candidates);
883        deduped.retain(|event| filter.match_event(event));
884        deduped.truncate(limit);
885        Ok(deduped)
886    }
887}
888
889impl SocialGraphBackend for SocialGraphStore {
890    fn stats(&self) -> Result<SocialGraphStats> {
891        SocialGraphStore::stats(self)
892    }
893
894    fn users_by_follow_distance(&self, distance: u32) -> Result<Vec<[u8; 32]>> {
895        SocialGraphStore::users_by_follow_distance(self, distance)
896    }
897
898    fn follow_distance(&self, pk_bytes: &[u8; 32]) -> Result<Option<u32>> {
899        SocialGraphStore::follow_distance(self, pk_bytes)
900    }
901
902    fn follow_list_created_at(&self, owner: &[u8; 32]) -> Result<Option<u64>> {
903        SocialGraphStore::follow_list_created_at(self, owner)
904    }
905
906    fn followed_targets(&self, owner: &[u8; 32]) -> Result<UserSet> {
907        SocialGraphStore::followed_targets(self, owner)
908    }
909
910    fn is_overmuted_user(&self, user_pk: &[u8; 32], threshold: f64) -> Result<bool> {
911        SocialGraphStore::is_overmuted_user(self, user_pk, threshold)
912    }
913
914    fn profile_search_root(&self) -> Result<Option<Cid>> {
915        SocialGraphStore::profile_search_root(self)
916    }
917
918    fn snapshot_chunks(&self, root: &[u8; 32], options: &BinaryBudget) -> Result<Vec<Bytes>> {
919        SocialGraphStore::snapshot_chunks(self, root, options)
920    }
921
922    fn ingest_event(&self, event: &Event) -> Result<()> {
923        SocialGraphStore::ingest_event(self, event)
924    }
925
926    fn ingest_event_with_storage_class(
927        &self,
928        event: &Event,
929        storage_class: EventStorageClass,
930    ) -> Result<()> {
931        SocialGraphStore::ingest_event_with_storage_class(self, event, storage_class)
932    }
933
934    fn ingest_events(&self, events: &[Event]) -> Result<()> {
935        SocialGraphStore::ingest_events(self, events)
936    }
937
938    fn ingest_events_with_storage_class(
939        &self,
940        events: &[Event],
941        storage_class: EventStorageClass,
942    ) -> Result<()> {
943        SocialGraphStore::ingest_events_with_storage_class(self, events, storage_class)
944    }
945
946    fn ingest_graph_events(&self, events: &[Event]) -> Result<()> {
947        SocialGraphStore::apply_graph_events_only(self, events)
948    }
949
950    fn query_events(&self, filter: &Filter, limit: usize) -> Result<Vec<Event>> {
951        SocialGraphStore::query_events(self, filter, limit)
952    }
953}
954
955impl NostrSocialGraphBackend for SocialGraphStore {
956    type Error = UpstreamGraphBackendError;
957
958    fn get_root(&self) -> std::result::Result<String, Self::Error> {
959        let graph = self.graph.lock().unwrap();
960        graph
961            .get_root()
962            .context("read social graph root")
963            .map_err(|err| UpstreamGraphBackendError(err.to_string()))
964    }
965
966    fn set_root(&mut self, root: &str) -> std::result::Result<(), Self::Error> {
967        let root_bytes =
968            decode_pubkey(root).map_err(|err| UpstreamGraphBackendError(err.to_string()))?;
969        SocialGraphStore::set_root(self, &root_bytes)
970            .map_err(|err| UpstreamGraphBackendError(err.to_string()))
971    }
972
973    fn handle_event(
974        &mut self,
975        event: &GraphEvent,
976        allow_unknown_authors: bool,
977        overmute_threshold: f64,
978    ) -> std::result::Result<(), Self::Error> {
979        {
980            let mut graph = self.graph.lock().unwrap();
981            graph
982                .handle_event(event, allow_unknown_authors, overmute_threshold)
983                .context("ingest social graph event into heed backend")
984                .map_err(|err| UpstreamGraphBackendError(err.to_string()))?;
985        }
986        self.invalidate_distance_cache();
987        Ok(())
988    }
989
990    fn get_follow_distance(&self, user: &str) -> std::result::Result<u32, Self::Error> {
991        let graph = self.graph.lock().unwrap();
992        graph
993            .get_follow_distance(user)
994            .context("read social graph follow distance")
995            .map_err(|err| UpstreamGraphBackendError(err.to_string()))
996    }
997
998    fn is_following(
999        &self,
1000        follower: &str,
1001        followed_user: &str,
1002    ) -> std::result::Result<bool, Self::Error> {
1003        let graph = self.graph.lock().unwrap();
1004        graph
1005            .is_following(follower, followed_user)
1006            .context("read social graph following edge")
1007            .map_err(|err| UpstreamGraphBackendError(err.to_string()))
1008    }
1009
1010    fn get_followed_by_user(&self, user: &str) -> std::result::Result<Vec<String>, Self::Error> {
1011        let graph = self.graph.lock().unwrap();
1012        graph
1013            .get_followed_by_user(user)
1014            .context("read followed-by-user list")
1015            .map_err(|err| UpstreamGraphBackendError(err.to_string()))
1016    }
1017
1018    fn get_followers_by_user(&self, user: &str) -> std::result::Result<Vec<String>, Self::Error> {
1019        let graph = self.graph.lock().unwrap();
1020        graph
1021            .get_followers_by_user(user)
1022            .context("read followers-by-user list")
1023            .map_err(|err| UpstreamGraphBackendError(err.to_string()))
1024    }
1025
1026    fn get_muted_by_user(&self, user: &str) -> std::result::Result<Vec<String>, Self::Error> {
1027        let graph = self.graph.lock().unwrap();
1028        graph
1029            .get_muted_by_user(user)
1030            .context("read muted-by-user list")
1031            .map_err(|err| UpstreamGraphBackendError(err.to_string()))
1032    }
1033
1034    fn get_user_muted_by(&self, user: &str) -> std::result::Result<Vec<String>, Self::Error> {
1035        let graph = self.graph.lock().unwrap();
1036        graph
1037            .get_user_muted_by(user)
1038            .context("read user-muted-by list")
1039            .map_err(|err| UpstreamGraphBackendError(err.to_string()))
1040    }
1041
1042    fn get_follow_list_created_at(
1043        &self,
1044        user: &str,
1045    ) -> std::result::Result<Option<u64>, Self::Error> {
1046        let graph = self.graph.lock().unwrap();
1047        graph
1048            .get_follow_list_created_at(user)
1049            .context("read social graph follow list timestamp")
1050            .map_err(|err| UpstreamGraphBackendError(err.to_string()))
1051    }
1052
1053    fn get_mute_list_created_at(
1054        &self,
1055        user: &str,
1056    ) -> std::result::Result<Option<u64>, Self::Error> {
1057        let graph = self.graph.lock().unwrap();
1058        graph
1059            .get_mute_list_created_at(user)
1060            .context("read social graph mute list timestamp")
1061            .map_err(|err| UpstreamGraphBackendError(err.to_string()))
1062    }
1063
1064    fn is_overmuted(&self, user: &str, threshold: f64) -> std::result::Result<bool, Self::Error> {
1065        let graph = self.graph.lock().unwrap();
1066        graph
1067            .is_overmuted(user, threshold)
1068            .context("check social graph overmute")
1069            .map_err(|err| UpstreamGraphBackendError(err.to_string()))
1070    }
1071}
1072
1073impl<T> SocialGraphBackend for Arc<T>
1074where
1075    T: SocialGraphBackend + ?Sized,
1076{
1077    fn stats(&self) -> Result<SocialGraphStats> {
1078        self.as_ref().stats()
1079    }
1080
1081    fn users_by_follow_distance(&self, distance: u32) -> Result<Vec<[u8; 32]>> {
1082        self.as_ref().users_by_follow_distance(distance)
1083    }
1084
1085    fn follow_distance(&self, pk_bytes: &[u8; 32]) -> Result<Option<u32>> {
1086        self.as_ref().follow_distance(pk_bytes)
1087    }
1088
1089    fn follow_list_created_at(&self, owner: &[u8; 32]) -> Result<Option<u64>> {
1090        self.as_ref().follow_list_created_at(owner)
1091    }
1092
1093    fn followed_targets(&self, owner: &[u8; 32]) -> Result<UserSet> {
1094        self.as_ref().followed_targets(owner)
1095    }
1096
1097    fn is_overmuted_user(&self, user_pk: &[u8; 32], threshold: f64) -> Result<bool> {
1098        self.as_ref().is_overmuted_user(user_pk, threshold)
1099    }
1100
1101    fn profile_search_root(&self) -> Result<Option<Cid>> {
1102        self.as_ref().profile_search_root()
1103    }
1104
1105    fn snapshot_chunks(&self, root: &[u8; 32], options: &BinaryBudget) -> Result<Vec<Bytes>> {
1106        self.as_ref().snapshot_chunks(root, options)
1107    }
1108
1109    fn ingest_event(&self, event: &Event) -> Result<()> {
1110        self.as_ref().ingest_event(event)
1111    }
1112
1113    fn ingest_event_with_storage_class(
1114        &self,
1115        event: &Event,
1116        storage_class: EventStorageClass,
1117    ) -> Result<()> {
1118        self.as_ref()
1119            .ingest_event_with_storage_class(event, storage_class)
1120    }
1121
1122    fn ingest_events(&self, events: &[Event]) -> Result<()> {
1123        self.as_ref().ingest_events(events)
1124    }
1125
1126    fn ingest_events_with_storage_class(
1127        &self,
1128        events: &[Event],
1129        storage_class: EventStorageClass,
1130    ) -> Result<()> {
1131        self.as_ref()
1132            .ingest_events_with_storage_class(events, storage_class)
1133    }
1134
1135    fn ingest_graph_events(&self, events: &[Event]) -> Result<()> {
1136        self.as_ref().ingest_graph_events(events)
1137    }
1138
1139    fn query_events(&self, filter: &Filter, limit: usize) -> Result<Vec<Event>> {
1140        self.as_ref().query_events(filter, limit)
1141    }
1142}
1143
1144fn should_replace_placeholder_root(graph: &HeedSocialGraph) -> Result<bool> {
1145    if graph.get_root().context("read current social graph root")? != DEFAULT_ROOT_HEX {
1146        return Ok(false);
1147    }
1148
1149    let GraphStats {
1150        users,
1151        follows,
1152        mutes,
1153        ..
1154    } = graph.size().context("size social graph")?;
1155    Ok(users <= 1 && follows == 0 && mutes == 0)
1156}
1157
1158fn decode_pubkey_set(values: Vec<String>) -> Result<UserSet> {
1159    let mut set = UserSet::new();
1160    for value in values {
1161        set.insert(decode_pubkey(&value)?);
1162    }
1163    Ok(set)
1164}
1165
1166fn decode_pubkey(value: &str) -> Result<[u8; 32]> {
1167    let mut bytes = [0u8; 32];
1168    hex::decode_to_slice(value, &mut bytes)
1169        .with_context(|| format!("decode social graph pubkey {value}"))?;
1170    Ok(bytes)
1171}
1172
1173fn is_social_graph_event(kind: Kind) -> bool {
1174    kind == Kind::ContactList || kind == Kind::MuteList
1175}
1176
1177fn graph_event_from_nostr(event: &Event) -> GraphEvent {
1178    GraphEvent {
1179        created_at: event.created_at.as_u64(),
1180        content: event.content.clone(),
1181        tags: event
1182            .tags
1183            .iter()
1184            .map(|tag| tag.as_slice().to_vec())
1185            .collect(),
1186        kind: event.kind.as_u16() as u32,
1187        pubkey: event.pubkey.to_hex(),
1188        id: event.id.to_hex(),
1189        sig: event.sig.to_string(),
1190    }
1191}
1192
1193fn stored_event_from_nostr(event: &Event) -> StoredNostrEvent {
1194    StoredNostrEvent {
1195        id: event.id.to_hex(),
1196        pubkey: event.pubkey.to_hex(),
1197        created_at: event.created_at.as_u64(),
1198        kind: event.kind.as_u16() as u32,
1199        tags: event
1200            .tags
1201            .iter()
1202            .map(|tag| tag.as_slice().to_vec())
1203            .collect(),
1204        content: event.content.clone(),
1205        sig: event.sig.to_string(),
1206    }
1207}
1208
1209fn nostr_event_from_stored(event: StoredNostrEvent) -> Result<Event> {
1210    let value = serde_json::json!({
1211        "id": event.id,
1212        "pubkey": event.pubkey,
1213        "created_at": event.created_at,
1214        "kind": event.kind,
1215        "tags": event.tags,
1216        "content": event.content,
1217        "sig": event.sig,
1218    });
1219    Event::from_json(value.to_string()).context("decode stored nostr event")
1220}
1221
1222pub(crate) fn stored_event_to_nostr_event(event: StoredNostrEvent) -> Result<Event> {
1223    nostr_event_from_stored(event)
1224}
1225
1226fn encode_cid(cid: &Cid) -> Result<Vec<u8>> {
1227    rmp_serde::to_vec_named(&StoredCid {
1228        hash: cid.hash,
1229        key: cid.key,
1230    })
1231    .context("encode social graph events root")
1232}
1233
1234fn decode_cid(bytes: &[u8]) -> Result<Option<Cid>> {
1235    let stored: StoredCid =
1236        rmp_serde::from_slice(bytes).context("decode social graph events root")?;
1237    Ok(Some(Cid {
1238        hash: stored.hash,
1239        key: stored.key,
1240    }))
1241}
1242
1243fn read_root_file(path: &Path) -> Result<Option<Cid>> {
1244    let Ok(bytes) = std::fs::read(path) else {
1245        return Ok(None);
1246    };
1247    decode_cid(&bytes)
1248}
1249
1250fn write_root_file(path: &Path, root: Option<&Cid>) -> Result<()> {
1251    let Some(root) = root else {
1252        if path.exists() {
1253            std::fs::remove_file(path)?;
1254        }
1255        return Ok(());
1256    };
1257
1258    let encoded = encode_cid(root)?;
1259    let tmp_path = path.with_extension("tmp");
1260    std::fs::write(&tmp_path, encoded)?;
1261    std::fs::rename(tmp_path, path)?;
1262    Ok(())
1263}
1264
1265fn normalize_profile_name(value: &serde_json::Value) -> Option<String> {
1266    let raw = value.as_str()?;
1267    let trimmed = raw.split_whitespace().collect::<Vec<_>>().join(" ");
1268    if trimmed.is_empty() {
1269        return None;
1270    }
1271    Some(trimmed.chars().take(PROFILE_NAME_MAX_LENGTH).collect())
1272}
1273
1274fn extract_profile_names(profile: &serde_json::Map<String, serde_json::Value>) -> Vec<String> {
1275    let mut names = Vec::new();
1276    let mut seen = HashSet::new();
1277
1278    for key in ["display_name", "displayName", "name", "username"] {
1279        let Some(value) = profile.get(key).and_then(normalize_profile_name) else {
1280            continue;
1281        };
1282        let lowered = value.to_lowercase();
1283        if seen.insert(lowered) {
1284            names.push(value);
1285        }
1286    }
1287
1288    names
1289}
1290
1291fn should_reject_profile_nip05(local_part: &str, primary_name: &str) -> bool {
1292    if local_part.len() == 1 || local_part.starts_with("npub1") {
1293        return true;
1294    }
1295
1296    primary_name
1297        .to_lowercase()
1298        .split_whitespace()
1299        .collect::<String>()
1300        .contains(local_part)
1301}
1302
1303fn normalize_profile_nip05(
1304    profile: &serde_json::Map<String, serde_json::Value>,
1305    primary_name: Option<&str>,
1306) -> Option<String> {
1307    let raw = profile.get("nip05")?.as_str()?;
1308    let local_part = raw.split('@').next()?.trim().to_lowercase();
1309    if local_part.is_empty() {
1310        return None;
1311    }
1312    let truncated: String = local_part.chars().take(PROFILE_NAME_MAX_LENGTH).collect();
1313    if truncated.is_empty() {
1314        return None;
1315    }
1316    if primary_name.is_some_and(|name| should_reject_profile_nip05(&truncated, name)) {
1317        return None;
1318    }
1319    Some(truncated)
1320}
1321
1322fn is_search_stop_word(word: &str) -> bool {
1323    matches!(
1324        word,
1325        "a" | "an"
1326            | "the"
1327            | "and"
1328            | "or"
1329            | "but"
1330            | "in"
1331            | "on"
1332            | "at"
1333            | "to"
1334            | "for"
1335            | "of"
1336            | "with"
1337            | "by"
1338            | "from"
1339            | "is"
1340            | "it"
1341            | "as"
1342            | "be"
1343            | "was"
1344            | "are"
1345            | "this"
1346            | "that"
1347            | "these"
1348            | "those"
1349            | "i"
1350            | "you"
1351            | "he"
1352            | "she"
1353            | "we"
1354            | "they"
1355            | "my"
1356            | "your"
1357            | "his"
1358            | "her"
1359            | "its"
1360            | "our"
1361            | "their"
1362            | "what"
1363            | "which"
1364            | "who"
1365            | "whom"
1366            | "how"
1367            | "when"
1368            | "where"
1369            | "why"
1370            | "will"
1371            | "would"
1372            | "could"
1373            | "should"
1374            | "can"
1375            | "may"
1376            | "might"
1377            | "must"
1378            | "have"
1379            | "has"
1380            | "had"
1381            | "do"
1382            | "does"
1383            | "did"
1384            | "been"
1385            | "being"
1386            | "get"
1387            | "got"
1388            | "just"
1389            | "now"
1390            | "then"
1391            | "so"
1392            | "if"
1393            | "not"
1394            | "no"
1395            | "yes"
1396            | "all"
1397            | "any"
1398            | "some"
1399            | "more"
1400            | "most"
1401            | "other"
1402            | "into"
1403            | "over"
1404            | "after"
1405            | "before"
1406            | "about"
1407            | "up"
1408            | "down"
1409            | "out"
1410            | "off"
1411            | "through"
1412            | "during"
1413            | "under"
1414            | "again"
1415            | "further"
1416            | "once"
1417    )
1418}
1419
1420fn is_pure_search_number(word: &str) -> bool {
1421    if !word.chars().all(|ch| ch.is_ascii_digit()) {
1422        return false;
1423    }
1424    !(word.len() == 4
1425        && word
1426            .parse::<u16>()
1427            .is_ok_and(|year| (1900..=2099).contains(&year)))
1428}
1429
1430fn split_compound_search_word(word: &str) -> Vec<String> {
1431    let mut parts = Vec::new();
1432    let mut current = String::new();
1433    let chars: Vec<char> = word.chars().collect();
1434
1435    for (index, ch) in chars.iter().copied().enumerate() {
1436        let split_before = current.chars().last().is_some_and(|prev| {
1437            (prev.is_lowercase() && ch.is_uppercase())
1438                || (prev.is_ascii_digit() && ch.is_alphabetic())
1439                || (prev.is_alphabetic() && ch.is_ascii_digit())
1440                || (prev.is_uppercase()
1441                    && ch.is_uppercase()
1442                    && chars.get(index + 1).is_some_and(|next| next.is_lowercase()))
1443        });
1444
1445        if split_before && !current.is_empty() {
1446            parts.push(std::mem::take(&mut current));
1447        }
1448
1449        current.push(ch);
1450    }
1451
1452    if !current.is_empty() {
1453        parts.push(current);
1454    }
1455
1456    parts
1457}
1458
1459fn parse_search_keywords(text: &str) -> Vec<String> {
1460    let mut keywords = Vec::new();
1461    let mut seen = HashSet::new();
1462
1463    for word in text
1464        .split(|ch: char| !ch.is_alphanumeric())
1465        .filter(|word| !word.is_empty())
1466    {
1467        let mut variants = Vec::with_capacity(1 + word.len() / 4);
1468        variants.push(word.to_lowercase());
1469        variants.extend(
1470            split_compound_search_word(word)
1471                .into_iter()
1472                .map(|part| part.to_lowercase()),
1473        );
1474
1475        for lowered in variants {
1476            if lowered.chars().count() < 2
1477                || is_search_stop_word(&lowered)
1478                || is_pure_search_number(&lowered)
1479            {
1480                continue;
1481            }
1482            if seen.insert(lowered.clone()) {
1483                keywords.push(lowered);
1484            }
1485        }
1486    }
1487
1488    keywords
1489}
1490
1491fn profile_search_terms_for_event(event: &Event) -> Vec<String> {
1492    let profile = match serde_json::from_str::<serde_json::Value>(&event.content) {
1493        Ok(serde_json::Value::Object(profile)) => profile,
1494        _ => serde_json::Map::new(),
1495    };
1496    let names = extract_profile_names(&profile);
1497    let primary_name = names.first().map(String::as_str);
1498    let mut parts = Vec::new();
1499    if let Some(name) = primary_name {
1500        parts.push(name.to_string());
1501    }
1502    if let Some(nip05) = normalize_profile_nip05(&profile, primary_name) {
1503        parts.push(nip05);
1504    }
1505    parts.push(event.pubkey.to_hex());
1506    if names.len() > 1 {
1507        parts.extend(names.into_iter().skip(1));
1508    }
1509    parse_search_keywords(&parts.join(" "))
1510}
1511
1512fn compare_nostr_events(left: &Event, right: &Event) -> std::cmp::Ordering {
1513    left.created_at
1514        .as_u64()
1515        .cmp(&right.created_at.as_u64())
1516        .then_with(|| left.id.to_hex().cmp(&right.id.to_hex()))
1517}
1518
1519fn map_event_store_error(err: NostrEventStoreError) -> anyhow::Error {
1520    anyhow::anyhow!("nostr event store error: {err}")
1521}
1522
1523fn ensure_social_graph_mapsize(db_dir: &Path, requested_bytes: u64) -> Result<()> {
1524    let requested = requested_bytes.max(DEFAULT_SOCIALGRAPH_MAP_SIZE_BYTES);
1525    let page_size = page_size_bytes() as u64;
1526    let rounded = requested
1527        .checked_add(page_size.saturating_sub(1))
1528        .map(|size| size / page_size * page_size)
1529        .unwrap_or(requested);
1530    let map_size = usize::try_from(rounded).context("social graph mapsize exceeds usize")?;
1531
1532    let env = unsafe {
1533        heed::EnvOpenOptions::new()
1534            .map_size(DEFAULT_SOCIALGRAPH_MAP_SIZE_BYTES as usize)
1535            .max_dbs(SOCIALGRAPH_MAX_DBS)
1536            .open(db_dir)
1537    }
1538    .context("open social graph LMDB env for resize")?;
1539    if env.info().map_size < map_size {
1540        unsafe { env.resize(map_size) }.context("resize social graph LMDB env")?;
1541    }
1542
1543    Ok(())
1544}
1545
1546fn page_size_bytes() -> usize {
1547    page_size::get_granularity()
1548}
1549
1550#[cfg(test)]
1551mod tests;