Skip to main content

rover/storage/
robots.rs

1//! Async API over the `robots_cache` table.
2//!
3//! Mirrors `storage::pages` in shape: opaque row struct, lookup by primary key,
4//! upsert, prune. The `state` column tracks one of `parsed`, `allow_all`, or
5//! `disallow_all` per the M5 design spec.
6
7use crate::storage::{Db, StorageError};
8
9use super::StringErr;
10
11/// One row from `robots_cache`. The `state` discriminator is a string at the
12/// storage edge so SQL migrations don't have to know about Rust enums.
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct RobotsEntry {
15    pub host: String,
16    pub body: Option<String>, // None for allow_all / disallow_all sentinels
17    pub fetched_at: i64,
18    pub expires_at: i64,
19    pub state: RobotsState,
20}
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum RobotsState {
24    Parsed,
25    AllowAll,
26    DisallowAll,
27}
28
29impl RobotsState {
30    pub fn as_str(self) -> &'static str {
31        match self {
32            Self::Parsed => "parsed",
33            Self::AllowAll => "allow_all",
34            Self::DisallowAll => "disallow_all",
35        }
36    }
37
38    pub fn from_db(s: &str) -> Result<Self, StorageError> {
39        Ok(match s {
40            "parsed" => Self::Parsed,
41            "allow_all" => Self::AllowAll,
42            "disallow_all" => Self::DisallowAll,
43            other => {
44                // tokio_rusqlite 0.7's `Error` enum has no `Other` variant
45                // (only `ConnectionClosed`, `Close`, `Error(rusqlite::Error)`),
46                // so we wrap a synthetic `rusqlite::Error::FromSqlConversionFailure`
47                // — semantically correct here since we're failing to map a SQL
48                // text value back to a Rust enum. Column index 4 matches the
49                // `state` position in `lookup`'s SELECT projection.
50                return Err(StorageError::Backend(tokio_rusqlite::Error::Error(
51                    rusqlite::Error::FromSqlConversionFailure(
52                        4,
53                        rusqlite::types::Type::Text,
54                        Box::new(StringErr(format!("unknown robots_cache.state = {other}"))),
55                    ),
56                )));
57            }
58        })
59    }
60}
61
62pub async fn lookup(db: &Db, host: &str) -> Result<Option<RobotsEntry>, StorageError> {
63    let host = host.to_string();
64    let row = db
65        .conn
66        .call(move |c| {
67            let mut stmt = c.prepare(
68                "SELECT host, body, fetched_at, expires_at, state \
69                 FROM robots_cache WHERE host = ?1",
70            )?;
71            let mut rows = stmt.query([&host])?;
72            if let Some(r) = rows.next()? {
73                let host: String = r.get(0)?;
74                let body: Option<String> = r.get(1)?;
75                let fetched_at: i64 = r.get(2)?;
76                let expires_at: i64 = r.get(3)?;
77                let state_s: String = r.get(4)?;
78                Ok::<_, rusqlite::Error>(Some((host, body, fetched_at, expires_at, state_s)))
79            } else {
80                Ok(None)
81            }
82        })
83        .await?;
84
85    let Some((host, body, fetched_at, expires_at, state_s)) = row else {
86        return Ok(None);
87    };
88    let state = RobotsState::from_db(&state_s)?;
89    Ok(Some(RobotsEntry {
90        host,
91        body,
92        fetched_at,
93        expires_at,
94        state,
95    }))
96}
97
98pub async fn upsert(db: &Db, entry: RobotsEntry) -> Result<(), StorageError> {
99    let RobotsEntry {
100        host,
101        body,
102        fetched_at,
103        expires_at,
104        state,
105    } = entry;
106    let state_s = state.as_str().to_string();
107    db.conn
108        .call(move |c| {
109            c.execute(
110                "INSERT INTO robots_cache (host, body, fetched_at, expires_at, state) \
111                 VALUES (?1, ?2, ?3, ?4, ?5) \
112                 ON CONFLICT(host) DO UPDATE SET \
113                    body=excluded.body, \
114                    fetched_at=excluded.fetched_at, \
115                    expires_at=excluded.expires_at, \
116                    state=excluded.state",
117                rusqlite::params![host, body, fetched_at, expires_at, state_s],
118            )?;
119            Ok::<_, rusqlite::Error>(())
120        })
121        .await?;
122    Ok(())
123}
124
125pub async fn prune_expired(db: &Db, now: i64) -> Result<usize, StorageError> {
126    let removed = db
127        .conn
128        .call(move |c| {
129            let n = c.execute("DELETE FROM robots_cache WHERE expires_at < ?1", [now])?;
130            Ok::<_, rusqlite::Error>(n)
131        })
132        .await?;
133    Ok(removed)
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139    use tempfile::tempdir;
140
141    async fn fresh_db() -> Db {
142        let tmp = tempdir().unwrap();
143        Db::open(tmp.path().join("rover.db")).await.unwrap()
144    }
145
146    #[tokio::test]
147    async fn upsert_and_lookup_round_trip_parsed() {
148        let db = fresh_db().await;
149        let entry = RobotsEntry {
150            host: "example.com".into(),
151            body: Some("User-agent: *\nDisallow: /admin".into()),
152            fetched_at: 1_000,
153            expires_at: 1_000 + 86_400,
154            state: RobotsState::Parsed,
155        };
156        upsert(&db, entry.clone()).await.unwrap();
157        let got = lookup(&db, "example.com").await.unwrap();
158        assert_eq!(got.as_ref(), Some(&entry));
159    }
160
161    #[tokio::test]
162    async fn lookup_unknown_host_returns_none() {
163        let db = fresh_db().await;
164        assert_eq!(lookup(&db, "absent.example").await.unwrap(), None);
165    }
166
167    #[tokio::test]
168    async fn upsert_overwrites_existing_row() {
169        let db = fresh_db().await;
170        let one = RobotsEntry {
171            host: "example.com".into(),
172            body: Some("v1".into()),
173            fetched_at: 1_000,
174            expires_at: 2_000,
175            state: RobotsState::Parsed,
176        };
177        let two = RobotsEntry {
178            body: Some("v2".into()),
179            ..one.clone()
180        };
181        upsert(&db, one).await.unwrap();
182        upsert(&db, two.clone()).await.unwrap();
183        assert_eq!(lookup(&db, "example.com").await.unwrap(), Some(two));
184    }
185
186    #[tokio::test]
187    async fn allow_all_sentinel_has_no_body() {
188        let db = fresh_db().await;
189        let entry = RobotsEntry {
190            host: "404.example".into(),
191            body: None,
192            fetched_at: 1_000,
193            expires_at: 1_000 + 86_400,
194            state: RobotsState::AllowAll,
195        };
196        upsert(&db, entry.clone()).await.unwrap();
197        let got = lookup(&db, "404.example").await.unwrap();
198        assert_eq!(got, Some(entry));
199    }
200
201    #[tokio::test]
202    async fn prune_expired_removes_old_rows_only() {
203        let db = fresh_db().await;
204        upsert(
205            &db,
206            RobotsEntry {
207                host: "old.example".into(),
208                body: Some("x".into()),
209                fetched_at: 100,
210                expires_at: 200,
211                state: RobotsState::Parsed,
212            },
213        )
214        .await
215        .unwrap();
216        upsert(
217            &db,
218            RobotsEntry {
219                host: "new.example".into(),
220                body: Some("y".into()),
221                fetched_at: 100,
222                expires_at: 10_000,
223                state: RobotsState::Parsed,
224            },
225        )
226        .await
227        .unwrap();
228        let pruned = prune_expired(&db, 500).await.unwrap();
229        assert_eq!(pruned, 1);
230        assert!(lookup(&db, "old.example").await.unwrap().is_none());
231        assert!(lookup(&db, "new.example").await.unwrap().is_some());
232    }
233}