progscrape_application/persist/
memindex.rs1use std::collections::HashMap;
2
3use itertools::Itertools;
4
5use progscrape_scrapers::{ScrapeCollection, StoryUrlNorm};
6
7use super::{shard::Shard, *};
8
9#[derive(Default, Serialize, Deserialize)]
11pub struct MemIndex {
12 stories: HashMap<Shard, HashMap<StoryUrlNorm, ScrapeCollection>>,
14}
15
16impl MemIndex {
17 pub fn get_all_stories(self) -> impl DoubleEndedIterator<Item = ScrapeCollection> {
18 let mut out = vec![];
19 for (_shard, stories) in self.stories.into_iter().sorted_by_cached_key(|f| f.0) {
20 for (_, story) in stories {
21 out.push(story);
22 }
23 }
24 out.into_iter()
25 }
26
27 fn map_mut(&mut self, shard: Shard) -> &mut HashMap<StoryUrlNorm, ScrapeCollection> {
28 self.stories.entry(shard).or_default()
29 }
30
31 fn map(&self, shard: &Shard) -> Option<&HashMap<StoryUrlNorm, ScrapeCollection>> {
32 self.stories.get(shard)
33 }
34
35 pub fn insert_scrapes<I: IntoIterator<Item = TypedScrape>>(
36 &mut self,
37 scrapes: I,
38 ) -> Result<(), PersistError> {
39 'outer: for scrape in scrapes {
40 let date = Shard::from_date_time(scrape.date);
41 let normalized_url = scrape.url.normalization();
42 for n in -2..=2 {
44 let map0 = self.map_mut(date.plus_months(n));
45 if let Some((key, mut scrapes)) = map0.remove_entry(normalized_url) {
46 scrapes.merge(scrape);
48 self.map_mut(Shard::from_date_time(scrapes.earliest))
49 .insert(key, scrapes);
50 continue 'outer;
51 }
52 }
53
54 if let Some(_old) = self.map_mut(date).insert(
56 normalized_url.clone(),
57 ScrapeCollection::new_from_one(scrape),
58 ) {
59 println!("Unexpected");
61 }
62 }
63 Ok(())
64 }
65}
66
67#[cfg(test)]
68mod test {
69
70 use progscrape_scrapers::ScrapeConfig;
71
72 use super::*;
73
74 #[test]
75 fn test_year_month() {
76 let date = Shard::from_year_month(2000, 12);
77 assert_eq!(Shard::from_year_month(2001, 1), date.plus_months(1));
78 assert_eq!(Shard::from_year_month(2001, 12), date.plus_months(12));
79 assert_eq!(Shard::from_year_month(1999, 12), date.sub_months(12));
80 assert_eq!(Shard::from_year_month(2000, 1), date.sub_months(11));
81
82 assert_eq!(
83 date,
84 Shard::from_string(&date.to_string()).expect("Failed to parse")
85 );
86 }
87
88 #[test]
89 fn test_index_lots() {
90 let stories = progscrape_scrapers::load_sample_scrapes(&ScrapeConfig::default());
91 let mut index = MemIndex::default();
92
93 let _eval = StoryEvaluator::new_for_test();
94 index
95 .insert_scrapes(stories)
96 .expect("Failed to insert scrapes");
97 }
98}