1use crate::storage::{Db, StorageError};
8
9use super::StringErr;
10
11#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct RobotsEntry {
15 pub host: String,
16 pub body: Option<String>, pub fetched_at: i64,
18 pub expires_at: i64,
19 pub state: RobotsState,
20}
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum RobotsState {
24 Parsed,
25 AllowAll,
26 DisallowAll,
27}
28
29impl RobotsState {
30 pub fn as_str(self) -> &'static str {
31 match self {
32 Self::Parsed => "parsed",
33 Self::AllowAll => "allow_all",
34 Self::DisallowAll => "disallow_all",
35 }
36 }
37
38 pub fn from_db(s: &str) -> Result<Self, StorageError> {
39 Ok(match s {
40 "parsed" => Self::Parsed,
41 "allow_all" => Self::AllowAll,
42 "disallow_all" => Self::DisallowAll,
43 other => {
44 return Err(StorageError::Backend(tokio_rusqlite::Error::Error(
51 rusqlite::Error::FromSqlConversionFailure(
52 4,
53 rusqlite::types::Type::Text,
54 Box::new(StringErr(format!("unknown robots_cache.state = {other}"))),
55 ),
56 )));
57 }
58 })
59 }
60}
61
62pub async fn lookup(db: &Db, host: &str) -> Result<Option<RobotsEntry>, StorageError> {
63 let host = host.to_string();
64 let row = db
65 .conn
66 .call(move |c| {
67 let mut stmt = c.prepare(
68 "SELECT host, body, fetched_at, expires_at, state \
69 FROM robots_cache WHERE host = ?1",
70 )?;
71 let mut rows = stmt.query([&host])?;
72 if let Some(r) = rows.next()? {
73 let host: String = r.get(0)?;
74 let body: Option<String> = r.get(1)?;
75 let fetched_at: i64 = r.get(2)?;
76 let expires_at: i64 = r.get(3)?;
77 let state_s: String = r.get(4)?;
78 Ok::<_, rusqlite::Error>(Some((host, body, fetched_at, expires_at, state_s)))
79 } else {
80 Ok(None)
81 }
82 })
83 .await?;
84
85 let Some((host, body, fetched_at, expires_at, state_s)) = row else {
86 return Ok(None);
87 };
88 let state = RobotsState::from_db(&state_s)?;
89 Ok(Some(RobotsEntry {
90 host,
91 body,
92 fetched_at,
93 expires_at,
94 state,
95 }))
96}
97
98pub async fn upsert(db: &Db, entry: RobotsEntry) -> Result<(), StorageError> {
99 let RobotsEntry {
100 host,
101 body,
102 fetched_at,
103 expires_at,
104 state,
105 } = entry;
106 let state_s = state.as_str().to_string();
107 db.conn
108 .call(move |c| {
109 c.execute(
110 "INSERT INTO robots_cache (host, body, fetched_at, expires_at, state) \
111 VALUES (?1, ?2, ?3, ?4, ?5) \
112 ON CONFLICT(host) DO UPDATE SET \
113 body=excluded.body, \
114 fetched_at=excluded.fetched_at, \
115 expires_at=excluded.expires_at, \
116 state=excluded.state",
117 rusqlite::params![host, body, fetched_at, expires_at, state_s],
118 )?;
119 Ok::<_, rusqlite::Error>(())
120 })
121 .await?;
122 Ok(())
123}
124
125pub async fn prune_expired(db: &Db, now: i64) -> Result<usize, StorageError> {
126 let removed = db
127 .conn
128 .call(move |c| {
129 let n = c.execute("DELETE FROM robots_cache WHERE expires_at < ?1", [now])?;
130 Ok::<_, rusqlite::Error>(n)
131 })
132 .await?;
133 Ok(removed)
134}
135
136#[cfg(test)]
137mod tests {
138 use super::*;
139 use tempfile::tempdir;
140
141 async fn fresh_db() -> Db {
142 let tmp = tempdir().unwrap();
143 Db::open(tmp.path().join("rover.db")).await.unwrap()
144 }
145
146 #[tokio::test]
147 async fn upsert_and_lookup_round_trip_parsed() {
148 let db = fresh_db().await;
149 let entry = RobotsEntry {
150 host: "example.com".into(),
151 body: Some("User-agent: *\nDisallow: /admin".into()),
152 fetched_at: 1_000,
153 expires_at: 1_000 + 86_400,
154 state: RobotsState::Parsed,
155 };
156 upsert(&db, entry.clone()).await.unwrap();
157 let got = lookup(&db, "example.com").await.unwrap();
158 assert_eq!(got.as_ref(), Some(&entry));
159 }
160
161 #[tokio::test]
162 async fn lookup_unknown_host_returns_none() {
163 let db = fresh_db().await;
164 assert_eq!(lookup(&db, "absent.example").await.unwrap(), None);
165 }
166
167 #[tokio::test]
168 async fn upsert_overwrites_existing_row() {
169 let db = fresh_db().await;
170 let one = RobotsEntry {
171 host: "example.com".into(),
172 body: Some("v1".into()),
173 fetched_at: 1_000,
174 expires_at: 2_000,
175 state: RobotsState::Parsed,
176 };
177 let two = RobotsEntry {
178 body: Some("v2".into()),
179 ..one.clone()
180 };
181 upsert(&db, one).await.unwrap();
182 upsert(&db, two.clone()).await.unwrap();
183 assert_eq!(lookup(&db, "example.com").await.unwrap(), Some(two));
184 }
185
186 #[tokio::test]
187 async fn allow_all_sentinel_has_no_body() {
188 let db = fresh_db().await;
189 let entry = RobotsEntry {
190 host: "404.example".into(),
191 body: None,
192 fetched_at: 1_000,
193 expires_at: 1_000 + 86_400,
194 state: RobotsState::AllowAll,
195 };
196 upsert(&db, entry.clone()).await.unwrap();
197 let got = lookup(&db, "404.example").await.unwrap();
198 assert_eq!(got, Some(entry));
199 }
200
201 #[tokio::test]
202 async fn prune_expired_removes_old_rows_only() {
203 let db = fresh_db().await;
204 upsert(
205 &db,
206 RobotsEntry {
207 host: "old.example".into(),
208 body: Some("x".into()),
209 fetched_at: 100,
210 expires_at: 200,
211 state: RobotsState::Parsed,
212 },
213 )
214 .await
215 .unwrap();
216 upsert(
217 &db,
218 RobotsEntry {
219 host: "new.example".into(),
220 body: Some("y".into()),
221 fetched_at: 100,
222 expires_at: 10_000,
223 state: RobotsState::Parsed,
224 },
225 )
226 .await
227 .unwrap();
228 let pruned = prune_expired(&db, 500).await.unwrap();
229 assert_eq!(pruned, 1);
230 assert!(lookup(&db, "old.example").await.unwrap().is_none());
231 assert!(lookup(&db, "new.example").await.unwrap().is_some());
232 }
233}