use bytes::Bytes;
use std::collections::HashSet;
use crate::tools::buckets::{BucketLocation, BucketType};
use crate::tools::time::{DurationMillis, MILLIS_IN_MINUTE, TimeMillis};
use crate::tools::types::Id;
const RECENT_POSTS_PEN_TTL: DurationMillis = MILLIS_IN_MINUTE.const_mul(10);
pub struct RecentPostsPenEntry {
pub bucket_location: BucketLocation,
pub post_id: Id,
pub encoded_post_bytes: Bytes,
pub time_millis: TimeMillis,
}
pub struct RecentPostsPen {
entries: Vec<RecentPostsPenEntry>,
}
impl RecentPostsPen {
pub fn new() -> Self {
Self { entries: Vec::new() }
}
pub fn add_all(&mut self, bucket_locations_and_post_ids: &[(BucketLocation, Id)], encoded_post_bytes: Bytes, time_millis: TimeMillis) {
for (bucket_location, post_id) in bucket_locations_and_post_ids {
self.entries.push(RecentPostsPenEntry {
bucket_location: bucket_location.clone(),
post_id: *post_id,
encoded_post_bytes: encoded_post_bytes.clone(),
time_millis,
});
}
}
pub fn get_matching_posts(
&mut self,
bucket_type: BucketType,
base_id: &Id,
already_seen_ids: &HashSet<Id>,
current_time: TimeMillis,
) -> Vec<(BucketLocation, Bytes, Id)> {
let cutoff = current_time - RECENT_POSTS_PEN_TTL;
self.entries.retain(|entry| entry.time_millis >= cutoff);
let mut matching_posts: Vec<(BucketLocation, Bytes, Id)> = Vec::new();
for entry in &self.entries {
if entry.bucket_location.bucket_type != bucket_type || entry.bucket_location.base_id != *base_id {
continue;
}
if already_seen_ids.contains(&entry.post_id) {
continue;
}
matching_posts.push((entry.bucket_location.clone(), entry.encoded_post_bytes.clone(), entry.post_id));
}
matching_posts
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::tools::time::MILLIS_IN_MINUTE;
fn make_entry(bucket_type: BucketType, base_id: Id, post_id: Id, time: TimeMillis) -> (BucketLocation, Id) {
let bucket_location = BucketLocation::new(bucket_type, base_id, MILLIS_IN_MINUTE, time).unwrap();
(bucket_location, post_id)
}
#[test]
fn test_matching_by_bucket_type_and_base_id() {
let mut pen = RecentPostsPen::new();
let base_id = Id::random();
let other_base_id = Id::random();
let post_id = Id::random();
let time = TimeMillis::from_epoch_offset_str("1M").unwrap();
let entries = vec![
make_entry(BucketType::User, base_id, post_id, time),
make_entry(BucketType::Hashtag, other_base_id, post_id, time),
];
pen.add_all(&entries, Bytes::from_static(b"test post"), time);
let already_seen = HashSet::new();
let result = pen.get_matching_posts(BucketType::User, &base_id, &already_seen, time);
assert_eq!(result.len(), 1);
let result = pen.get_matching_posts(BucketType::Hashtag, &other_base_id, &already_seen, time);
assert_eq!(result.len(), 1);
let result = pen.get_matching_posts(BucketType::User, &other_base_id, &already_seen, time);
assert_eq!(result.len(), 0);
let result = pen.get_matching_posts(BucketType::Hashtag, &base_id, &already_seen, time);
assert_eq!(result.len(), 0);
}
#[test]
fn test_ttl_expiration() {
let mut pen = RecentPostsPen::new();
let base_id = Id::random();
let post_id = Id::random();
let time = TimeMillis::from_epoch_offset_str("1M").unwrap();
pen.add_all(&[make_entry(BucketType::User, base_id, post_id, time)], Bytes::from_static(b"post"), time);
let already_seen = HashSet::new();
let within_ttl = time + MILLIS_IN_MINUTE.const_mul(9);
let result = pen.get_matching_posts(BucketType::User, &base_id, &already_seen, within_ttl);
assert_eq!(result.len(), 1);
let past_ttl = time + MILLIS_IN_MINUTE.const_mul(11);
let result = pen.get_matching_posts(BucketType::User, &base_id, &already_seen, past_ttl);
assert_eq!(result.len(), 0);
}
#[test]
fn test_multiple_tokens_same_post_returns_all() {
let mut pen = RecentPostsPen::new();
let base_id = Id::random();
let post_id = Id::random();
let time = TimeMillis::from_epoch_offset_str("1M").unwrap();
let entries = vec![
make_entry(BucketType::User, base_id, post_id, time),
make_entry(BucketType::User, base_id, post_id, time),
make_entry(BucketType::User, base_id, post_id, time),
];
pen.add_all(&entries, Bytes::from_static(b"post"), time);
let already_seen = HashSet::new();
let result = pen.get_matching_posts(BucketType::User, &base_id, &already_seen, time);
assert_eq!(result.len(), 3);
}
#[test]
fn test_already_seen_filtering() {
let mut pen = RecentPostsPen::new();
let base_id = Id::random();
let post_id = Id::random();
let time = TimeMillis::from_epoch_offset_str("1M").unwrap();
pen.add_all(&[make_entry(BucketType::User, base_id, post_id, time)], Bytes::from_static(b"post"), time);
let mut already_seen = HashSet::new();
already_seen.insert(post_id);
let result = pen.get_matching_posts(BucketType::User, &base_id, &already_seen, time);
assert_eq!(result.len(), 0);
}
#[test]
fn test_single_post_multiple_timelines() {
let mut pen = RecentPostsPen::new();
let user_id = Id::random();
let hashtag_id = Id::random();
let mention_id = Id::random();
let post_id = Id::random();
let time = TimeMillis::from_epoch_offset_str("1M").unwrap();
let entries = vec![
make_entry(BucketType::User, user_id, post_id, time),
make_entry(BucketType::Hashtag, hashtag_id, post_id, time),
make_entry(BucketType::Mention, mention_id, post_id, time),
];
pen.add_all(&entries, Bytes::from_static(b"post"), time);
let already_seen = HashSet::new();
assert_eq!(pen.get_matching_posts(BucketType::User, &user_id, &already_seen, time).len(), 1);
assert_eq!(pen.get_matching_posts(BucketType::Hashtag, &hashtag_id, &already_seen, time).len(), 1);
assert_eq!(pen.get_matching_posts(BucketType::Mention, &mention_id, &already_seen, time).len(), 1);
}
}