Skip to main content

argus_frontier/
in_memory.rs

1use std::collections::VecDeque;
2use std::sync::Arc;
3use tokio::sync::Mutex;
4
5use async_trait::async_trait;
6
7use argus_common::CrawlJob;
8
9use crate::frontier::Frontier;
10
11#[derive(Clone, Default)]
12pub struct InMemoryFrontier {
13    queue: Arc<Mutex<VecDeque<CrawlJob>>>,
14}
15
16#[async_trait]
17impl Frontier for InMemoryFrontier {
18    async fn push(&self, job: CrawlJob) {
19        let mut q = self.queue.lock().await;
20        q.push_back(job);
21    }
22
23    async fn pop(&self) -> Option<CrawlJob> {
24        let mut q = self.queue.lock().await;
25        q.pop_front()
26    }
27}
28
29#[cfg(test)]
30mod tests {
31    use super::*;
32
33    fn job(url: &str, depth: u16) -> CrawlJob {
34        CrawlJob {
35            url: url.to_string(),
36            normalized_url: url.to_string(),
37            host: "example.com".to_string(),
38            depth,
39        }
40    }
41
42    #[tokio::test]
43    async fn push_pop_fifo_order() {
44        let frontier = InMemoryFrontier::default();
45        frontier.push(job("https://a.com", 0)).await;
46        frontier.push(job("https://b.com", 1)).await;
47        let first = frontier.pop().await.unwrap();
48        let second = frontier.pop().await.unwrap();
49        assert_eq!(first.url, "https://a.com");
50        assert_eq!(second.url, "https://b.com");
51    }
52
53    #[tokio::test]
54    async fn pop_empty_returns_none() {
55        let frontier = InMemoryFrontier::default();
56        assert!(frontier.pop().await.is_none());
57    }
58}