Skip to main content

feed/
cache.rs

1use std::collections::HashSet;
2use std::fs;
3use std::path::{Path, PathBuf};
4
5use anyhow::{Context, Result};
6use chrono::{DateTime, Utc};
7use etcetera::BaseStrategy;
8use serde::{Deserialize, Serialize};
9use sha2::{Digest, Sha256};
10
11use crate::feed_source::RawFeed;
12
13#[derive(Clone, Debug)]
14pub struct CacheStore {
15    data_dir: PathBuf,
16}
17
18impl CacheStore {
19    pub fn new(data_dir: impl Into<PathBuf>) -> Self {
20        Self {
21            data_dir: data_dir.into(),
22        }
23    }
24
25    pub fn data_dir(&self) -> &Path {
26        &self.data_dir
27    }
28
29    fn feed_path(&self, url: &str) -> PathBuf {
30        let mut hasher = Sha256::new();
31        hasher.update(url.as_bytes());
32        let hash = format!("{:x}", hasher.finalize());
33        self.data_dir.join(format!("{}.json", &hash[..16]))
34    }
35
36    pub fn load_http_metadata(&self, url: &str) -> HttpMetadata {
37        let path = self.feed_path(url);
38        let content = match fs::read_to_string(&path) {
39            Ok(c) => c,
40            Err(_) => {
41                return HttpMetadata {
42                    etag: None,
43                    last_modified: None,
44                }
45            }
46        };
47        match serde_json::from_str::<CachedFeed>(&content) {
48            Ok(cached) => HttpMetadata {
49                etag: cached.etag,
50                last_modified: cached.last_modified,
51            },
52            Err(_) => HttpMetadata {
53                etag: None,
54                last_modified: None,
55            },
56        }
57    }
58
59    /// Load cached articles for a feed URL
60    pub fn load_feed(&self, url: &str) -> Option<CachedFeed> {
61        let path = self.feed_path(url);
62        let content = fs::read_to_string(&path).ok()?;
63        serde_json::from_str(&content).ok()
64    }
65
66    /// Save fetched results to cache, merging with existing entries (deduplicated by URL)
67    pub fn save_feed(
68        &self,
69        url: &str,
70        feed: &RawFeed,
71        etag: Option<&str>,
72        last_modified: Option<&str>,
73    ) -> Result<()> {
74        fs::create_dir_all(&self.data_dir).with_context(|| {
75            format!(
76                "Failed to create data directory: {}",
77                self.data_dir.display()
78            )
79        })?;
80
81        let path = self.feed_path(url);
82
83        // Load existing cache
84        let mut articles: Vec<CachedArticle> = if let Ok(content) = fs::read_to_string(&path) {
85            if let Ok(existing) = serde_json::from_str::<CachedFeed>(&content) {
86                existing.articles
87            } else {
88                Vec::new()
89            }
90        } else {
91            Vec::new()
92        };
93
94        // Build set of URLs present in the current fetch
95        let fetched_urls: HashSet<&str> = feed.entries.iter().map(|e| e.url.as_str()).collect();
96
97        let now = Utc::now();
98
99        // Update last_seen for existing entries that are still in the feed
100        for article in &mut articles {
101            if fetched_urls.contains(article.url.as_str()) {
102                article.last_seen = now;
103            }
104        }
105
106        // Merge new entries (deduplicate by URL, preserve read status)
107        let existing_urls: HashSet<String> = articles.iter().map(|e| e.url.clone()).collect();
108        for entry in &feed.entries {
109            if !entry.url.is_empty() && !existing_urls.contains(&entry.url) {
110                articles.push(CachedArticle {
111                    title: entry.title.clone(),
112                    url: entry.url.clone(),
113                    published: entry.published,
114                    read: false,
115                    rss_content: entry.rss_content.clone(),
116                    last_seen: now,
117                });
118            }
119        }
120
121        // Sort by datetime (newest first)
122        articles.sort_by(|a, b| b.published.cmp(&a.published));
123
124        let cached = CachedFeed {
125            feed_url: url.to_string(),
126            feed_title: feed.title.clone(),
127            last_fetched: Utc::now(),
128            etag: etag.map(String::from),
129            last_modified: last_modified.map(String::from),
130            articles,
131        };
132
133        let json = serde_json::to_string(&cached).context("Failed to serialize cache")?;
134        fs::write(&path, json)
135            .with_context(|| format!("Failed to write cache: {}", path.display()))?;
136
137        Ok(())
138    }
139
140    /// Remove entries older than retention_days from all cache files
141    pub fn purge_old_entries(&self, retention_days: i32) -> Result<()> {
142        if retention_days <= 0 {
143            return Ok(()); // 0=forever, negative=cache disabled
144        }
145
146        let cutoff = Utc::now() - chrono::Duration::days(retention_days as i64);
147
148        if !self.data_dir.exists() {
149            return Ok(());
150        }
151
152        for entry in fs::read_dir(&self.data_dir)? {
153            let entry = entry?;
154            let path = entry.path();
155            if path.extension().and_then(|e| e.to_str()) != Some("json") {
156                continue;
157            }
158
159            let content = fs::read_to_string(&path)?;
160            if let Ok(mut cached) = serde_json::from_str::<CachedFeed>(&content) {
161                let before = cached.articles.len();
162                cached.articles.retain(|e| e.last_seen > cutoff);
163                if cached.articles.len() != before {
164                    if cached.articles.is_empty() {
165                        fs::remove_file(&path)?;
166                    } else {
167                        let json = serde_json::to_string(&cached)?;
168                        fs::write(&path, json)?;
169                    }
170                }
171            }
172        }
173
174        Ok(())
175    }
176
177    /// Set the read status of an article in a specific feed's cache file (O(1) file lookup).
178    pub fn set_read_status(&self, feed_url: &str, article_url: &str, read: bool) -> Result<()> {
179        if !self.data_dir.exists() {
180            return Ok(());
181        }
182
183        let path = self.feed_path(feed_url);
184        if !path.exists() {
185            return Ok(());
186        }
187
188        let content = fs::read_to_string(&path)?;
189        if let Ok(mut cached) = serde_json::from_str::<CachedFeed>(&content) {
190            let mut changed = false;
191            for e in &mut cached.articles {
192                if e.url == article_url && e.read != read {
193                    e.read = read;
194                    changed = true;
195                }
196            }
197            if changed {
198                let json = serde_json::to_string(&cached)?;
199                fs::write(&path, json)?;
200            }
201        }
202
203        Ok(())
204    }
205}
206
207#[derive(Debug, Serialize, Deserialize)]
208pub struct CachedArticle {
209    pub title: String,
210    pub url: String,
211    pub published: Option<DateTime<Utc>>,
212    #[serde(default)]
213    pub read: bool,
214    #[serde(default, skip_serializing_if = "Option::is_none")]
215    pub rss_content: Option<String>,
216    /// Timestamp when this URL was last seen in a feed fetch.
217    #[serde(default = "Utc::now")]
218    pub last_seen: DateTime<Utc>,
219}
220
221#[derive(Debug, Serialize, Deserialize)]
222pub struct CachedFeed {
223    pub feed_url: String,
224    pub feed_title: String,
225    pub last_fetched: DateTime<Utc>,
226    #[serde(default, skip_serializing_if = "Option::is_none")]
227    pub etag: Option<String>,
228    #[serde(default, skip_serializing_if = "Option::is_none")]
229    pub last_modified: Option<String>,
230    pub articles: Vec<CachedArticle>,
231}
232
233pub struct HttpMetadata {
234    pub etag: Option<String>,
235    pub last_modified: Option<String>,
236}
237
238/// Determine data directory.
239/// Priority: config cache.path > XDG_DATA_HOME/feed/
240pub fn data_dir(config_path: Option<&str>) -> Result<PathBuf> {
241    if let Some(p) = config_path {
242        Ok(PathBuf::from(p))
243    } else {
244        let strategy =
245            etcetera::choose_base_strategy().context("Could not determine home directory")?;
246        Ok(strategy.data_dir().join("feed"))
247    }
248}